Merge tag 'drm-misc-next-fixes-2023-09-01' of git://anongit.freedesktop.org/drm/drm...
[platform/kernel/linux-rpi.git] / net / dccp / proto.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  *  net/dccp/proto.c
4  *
5  *  An implementation of the DCCP protocol
6  *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
7  */
8
9 #include <linux/dccp.h>
10 #include <linux/module.h>
11 #include <linux/types.h>
12 #include <linux/sched.h>
13 #include <linux/kernel.h>
14 #include <linux/skbuff.h>
15 #include <linux/netdevice.h>
16 #include <linux/in.h>
17 #include <linux/if_arp.h>
18 #include <linux/init.h>
19 #include <linux/random.h>
20 #include <linux/slab.h>
21 #include <net/checksum.h>
22
23 #include <net/inet_sock.h>
24 #include <net/inet_common.h>
25 #include <net/sock.h>
26 #include <net/xfrm.h>
27
28 #include <asm/ioctls.h>
29 #include <linux/spinlock.h>
30 #include <linux/timer.h>
31 #include <linux/delay.h>
32 #include <linux/poll.h>
33
34 #include "ccid.h"
35 #include "dccp.h"
36 #include "feat.h"
37
38 #define CREATE_TRACE_POINTS
39 #include "trace.h"
40
41 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
42
43 EXPORT_SYMBOL_GPL(dccp_statistics);
44
45 DEFINE_PER_CPU(unsigned int, dccp_orphan_count);
46 EXPORT_PER_CPU_SYMBOL_GPL(dccp_orphan_count);
47
48 struct inet_hashinfo dccp_hashinfo;
49 EXPORT_SYMBOL_GPL(dccp_hashinfo);
50
51 /* the maximum queue length for tx in packets. 0 is no limit */
52 int sysctl_dccp_tx_qlen __read_mostly = 5;
53
54 #ifdef CONFIG_IP_DCCP_DEBUG
55 static const char *dccp_state_name(const int state)
56 {
57         static const char *const dccp_state_names[] = {
58         [DCCP_OPEN]             = "OPEN",
59         [DCCP_REQUESTING]       = "REQUESTING",
60         [DCCP_PARTOPEN]         = "PARTOPEN",
61         [DCCP_LISTEN]           = "LISTEN",
62         [DCCP_RESPOND]          = "RESPOND",
63         [DCCP_CLOSING]          = "CLOSING",
64         [DCCP_ACTIVE_CLOSEREQ]  = "CLOSEREQ",
65         [DCCP_PASSIVE_CLOSE]    = "PASSIVE_CLOSE",
66         [DCCP_PASSIVE_CLOSEREQ] = "PASSIVE_CLOSEREQ",
67         [DCCP_TIME_WAIT]        = "TIME_WAIT",
68         [DCCP_CLOSED]           = "CLOSED",
69         };
70
71         if (state >= DCCP_MAX_STATES)
72                 return "INVALID STATE!";
73         else
74                 return dccp_state_names[state];
75 }
76 #endif
77
78 void dccp_set_state(struct sock *sk, const int state)
79 {
80         const int oldstate = sk->sk_state;
81
82         dccp_pr_debug("%s(%p)  %s  -->  %s\n", dccp_role(sk), sk,
83                       dccp_state_name(oldstate), dccp_state_name(state));
84         WARN_ON(state == oldstate);
85
86         switch (state) {
87         case DCCP_OPEN:
88                 if (oldstate != DCCP_OPEN)
89                         DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
90                 /* Client retransmits all Confirm options until entering OPEN */
91                 if (oldstate == DCCP_PARTOPEN)
92                         dccp_feat_list_purge(&dccp_sk(sk)->dccps_featneg);
93                 break;
94
95         case DCCP_CLOSED:
96                 if (oldstate == DCCP_OPEN || oldstate == DCCP_ACTIVE_CLOSEREQ ||
97                     oldstate == DCCP_CLOSING)
98                         DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
99
100                 sk->sk_prot->unhash(sk);
101                 if (inet_csk(sk)->icsk_bind_hash != NULL &&
102                     !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
103                         inet_put_port(sk);
104                 fallthrough;
105         default:
106                 if (oldstate == DCCP_OPEN)
107                         DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
108         }
109
110         /* Change state AFTER socket is unhashed to avoid closed
111          * socket sitting in hash tables.
112          */
113         inet_sk_set_state(sk, state);
114 }
115
116 EXPORT_SYMBOL_GPL(dccp_set_state);
117
118 static void dccp_finish_passive_close(struct sock *sk)
119 {
120         switch (sk->sk_state) {
121         case DCCP_PASSIVE_CLOSE:
122                 /* Node (client or server) has received Close packet. */
123                 dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED);
124                 dccp_set_state(sk, DCCP_CLOSED);
125                 break;
126         case DCCP_PASSIVE_CLOSEREQ:
127                 /*
128                  * Client received CloseReq. We set the `active' flag so that
129                  * dccp_send_close() retransmits the Close as per RFC 4340, 8.3.
130                  */
131                 dccp_send_close(sk, 1);
132                 dccp_set_state(sk, DCCP_CLOSING);
133         }
134 }
135
136 void dccp_done(struct sock *sk)
137 {
138         dccp_set_state(sk, DCCP_CLOSED);
139         dccp_clear_xmit_timers(sk);
140
141         sk->sk_shutdown = SHUTDOWN_MASK;
142
143         if (!sock_flag(sk, SOCK_DEAD))
144                 sk->sk_state_change(sk);
145         else
146                 inet_csk_destroy_sock(sk);
147 }
148
149 EXPORT_SYMBOL_GPL(dccp_done);
150
151 const char *dccp_packet_name(const int type)
152 {
153         static const char *const dccp_packet_names[] = {
154                 [DCCP_PKT_REQUEST]  = "REQUEST",
155                 [DCCP_PKT_RESPONSE] = "RESPONSE",
156                 [DCCP_PKT_DATA]     = "DATA",
157                 [DCCP_PKT_ACK]      = "ACK",
158                 [DCCP_PKT_DATAACK]  = "DATAACK",
159                 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
160                 [DCCP_PKT_CLOSE]    = "CLOSE",
161                 [DCCP_PKT_RESET]    = "RESET",
162                 [DCCP_PKT_SYNC]     = "SYNC",
163                 [DCCP_PKT_SYNCACK]  = "SYNCACK",
164         };
165
166         if (type >= DCCP_NR_PKT_TYPES)
167                 return "INVALID";
168         else
169                 return dccp_packet_names[type];
170 }
171
172 EXPORT_SYMBOL_GPL(dccp_packet_name);
173
174 void dccp_destruct_common(struct sock *sk)
175 {
176         struct dccp_sock *dp = dccp_sk(sk);
177
178         ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
179         dp->dccps_hc_tx_ccid = NULL;
180 }
181 EXPORT_SYMBOL_GPL(dccp_destruct_common);
182
183 static void dccp_sk_destruct(struct sock *sk)
184 {
185         dccp_destruct_common(sk);
186         inet_sock_destruct(sk);
187 }
188
189 int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
190 {
191         struct dccp_sock *dp = dccp_sk(sk);
192         struct inet_connection_sock *icsk = inet_csk(sk);
193
194         pr_warn_once("DCCP is deprecated and scheduled to be removed in 2025, "
195                      "please contact the netdev mailing list\n");
196
197         icsk->icsk_rto          = DCCP_TIMEOUT_INIT;
198         icsk->icsk_syn_retries  = sysctl_dccp_request_retries;
199         sk->sk_state            = DCCP_CLOSED;
200         sk->sk_write_space      = dccp_write_space;
201         sk->sk_destruct         = dccp_sk_destruct;
202         icsk->icsk_sync_mss     = dccp_sync_mss;
203         dp->dccps_mss_cache     = 536;
204         dp->dccps_rate_last     = jiffies;
205         dp->dccps_role          = DCCP_ROLE_UNDEFINED;
206         dp->dccps_service       = DCCP_SERVICE_CODE_IS_ABSENT;
207         dp->dccps_tx_qlen       = sysctl_dccp_tx_qlen;
208
209         dccp_init_xmit_timers(sk);
210
211         INIT_LIST_HEAD(&dp->dccps_featneg);
212         /* control socket doesn't need feat nego */
213         if (likely(ctl_sock_initialized))
214                 return dccp_feat_init(sk);
215         return 0;
216 }
217
218 EXPORT_SYMBOL_GPL(dccp_init_sock);
219
220 void dccp_destroy_sock(struct sock *sk)
221 {
222         struct dccp_sock *dp = dccp_sk(sk);
223
224         __skb_queue_purge(&sk->sk_write_queue);
225         if (sk->sk_send_head != NULL) {
226                 kfree_skb(sk->sk_send_head);
227                 sk->sk_send_head = NULL;
228         }
229
230         /* Clean up a referenced DCCP bind bucket. */
231         if (inet_csk(sk)->icsk_bind_hash != NULL)
232                 inet_put_port(sk);
233
234         kfree(dp->dccps_service_list);
235         dp->dccps_service_list = NULL;
236
237         if (dp->dccps_hc_rx_ackvec != NULL) {
238                 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
239                 dp->dccps_hc_rx_ackvec = NULL;
240         }
241         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
242         dp->dccps_hc_rx_ccid = NULL;
243
244         /* clean up feature negotiation state */
245         dccp_feat_list_purge(&dp->dccps_featneg);
246 }
247
248 EXPORT_SYMBOL_GPL(dccp_destroy_sock);
249
250 static inline int dccp_need_reset(int state)
251 {
252         return state != DCCP_CLOSED && state != DCCP_LISTEN &&
253                state != DCCP_REQUESTING;
254 }
255
256 int dccp_disconnect(struct sock *sk, int flags)
257 {
258         struct inet_connection_sock *icsk = inet_csk(sk);
259         struct inet_sock *inet = inet_sk(sk);
260         struct dccp_sock *dp = dccp_sk(sk);
261         const int old_state = sk->sk_state;
262
263         if (old_state != DCCP_CLOSED)
264                 dccp_set_state(sk, DCCP_CLOSED);
265
266         /*
267          * This corresponds to the ABORT function of RFC793, sec. 3.8
268          * TCP uses a RST segment, DCCP a Reset packet with Code 2, "Aborted".
269          */
270         if (old_state == DCCP_LISTEN) {
271                 inet_csk_listen_stop(sk);
272         } else if (dccp_need_reset(old_state)) {
273                 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
274                 sk->sk_err = ECONNRESET;
275         } else if (old_state == DCCP_REQUESTING)
276                 sk->sk_err = ECONNRESET;
277
278         dccp_clear_xmit_timers(sk);
279         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
280         dp->dccps_hc_rx_ccid = NULL;
281
282         __skb_queue_purge(&sk->sk_receive_queue);
283         __skb_queue_purge(&sk->sk_write_queue);
284         if (sk->sk_send_head != NULL) {
285                 __kfree_skb(sk->sk_send_head);
286                 sk->sk_send_head = NULL;
287         }
288
289         inet->inet_dport = 0;
290
291         inet_bhash2_reset_saddr(sk);
292
293         sk->sk_shutdown = 0;
294         sock_reset_flag(sk, SOCK_DONE);
295
296         icsk->icsk_backoff = 0;
297         inet_csk_delack_init(sk);
298         __sk_dst_reset(sk);
299
300         WARN_ON(inet->inet_num && !icsk->icsk_bind_hash);
301
302         sk_error_report(sk);
303         return 0;
304 }
305
306 EXPORT_SYMBOL_GPL(dccp_disconnect);
307
308 /*
309  *      Wait for a DCCP event.
310  *
311  *      Note that we don't need to lock the socket, as the upper poll layers
312  *      take care of normal races (between the test and the event) and we don't
313  *      go look at any of the socket buffers directly.
314  */
315 __poll_t dccp_poll(struct file *file, struct socket *sock,
316                        poll_table *wait)
317 {
318         __poll_t mask;
319         struct sock *sk = sock->sk;
320
321         sock_poll_wait(file, sock, wait);
322         if (sk->sk_state == DCCP_LISTEN)
323                 return inet_csk_listen_poll(sk);
324
325         /* Socket is not locked. We are protected from async events
326            by poll logic and correct handling of state changes
327            made by another threads is impossible in any case.
328          */
329
330         mask = 0;
331         if (sk->sk_err)
332                 mask = EPOLLERR;
333
334         if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
335                 mask |= EPOLLHUP;
336         if (sk->sk_shutdown & RCV_SHUTDOWN)
337                 mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
338
339         /* Connected? */
340         if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
341                 if (atomic_read(&sk->sk_rmem_alloc) > 0)
342                         mask |= EPOLLIN | EPOLLRDNORM;
343
344                 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
345                         if (sk_stream_is_writeable(sk)) {
346                                 mask |= EPOLLOUT | EPOLLWRNORM;
347                         } else {  /* send SIGIO later */
348                                 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
349                                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
350
351                                 /* Race breaker. If space is freed after
352                                  * wspace test but before the flags are set,
353                                  * IO signal will be lost.
354                                  */
355                                 if (sk_stream_is_writeable(sk))
356                                         mask |= EPOLLOUT | EPOLLWRNORM;
357                         }
358                 }
359         }
360         return mask;
361 }
362
363 EXPORT_SYMBOL_GPL(dccp_poll);
364
365 int dccp_ioctl(struct sock *sk, int cmd, int *karg)
366 {
367         int rc = -ENOTCONN;
368
369         lock_sock(sk);
370
371         if (sk->sk_state == DCCP_LISTEN)
372                 goto out;
373
374         switch (cmd) {
375         case SIOCOUTQ: {
376                 *karg = sk_wmem_alloc_get(sk);
377                 /* Using sk_wmem_alloc here because sk_wmem_queued is not used by DCCP and
378                  * always 0, comparably to UDP.
379                  */
380
381                 rc = 0;
382         }
383                 break;
384         case SIOCINQ: {
385                 struct sk_buff *skb;
386                 *karg = 0;
387
388                 skb = skb_peek(&sk->sk_receive_queue);
389                 if (skb != NULL) {
390                         /*
391                          * We will only return the amount of this packet since
392                          * that is all that will be read.
393                          */
394                         *karg = skb->len;
395                 }
396                 rc = 0;
397         }
398                 break;
399         default:
400                 rc = -ENOIOCTLCMD;
401                 break;
402         }
403 out:
404         release_sock(sk);
405         return rc;
406 }
407
408 EXPORT_SYMBOL_GPL(dccp_ioctl);
409
410 static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
411                                    sockptr_t optval, unsigned int optlen)
412 {
413         struct dccp_sock *dp = dccp_sk(sk);
414         struct dccp_service_list *sl = NULL;
415
416         if (service == DCCP_SERVICE_INVALID_VALUE ||
417             optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
418                 return -EINVAL;
419
420         if (optlen > sizeof(service)) {
421                 sl = kmalloc(optlen, GFP_KERNEL);
422                 if (sl == NULL)
423                         return -ENOMEM;
424
425                 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
426                 if (copy_from_sockptr_offset(sl->dccpsl_list, optval,
427                                 sizeof(service), optlen - sizeof(service)) ||
428                     dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
429                         kfree(sl);
430                         return -EFAULT;
431                 }
432         }
433
434         lock_sock(sk);
435         dp->dccps_service = service;
436
437         kfree(dp->dccps_service_list);
438
439         dp->dccps_service_list = sl;
440         release_sock(sk);
441         return 0;
442 }
443
444 static int dccp_setsockopt_cscov(struct sock *sk, int cscov, bool rx)
445 {
446         u8 *list, len;
447         int i, rc;
448
449         if (cscov < 0 || cscov > 15)
450                 return -EINVAL;
451         /*
452          * Populate a list of permissible values, in the range cscov...15. This
453          * is necessary since feature negotiation of single values only works if
454          * both sides incidentally choose the same value. Since the list starts
455          * lowest-value first, negotiation will pick the smallest shared value.
456          */
457         if (cscov == 0)
458                 return 0;
459         len = 16 - cscov;
460
461         list = kmalloc(len, GFP_KERNEL);
462         if (list == NULL)
463                 return -ENOBUFS;
464
465         for (i = 0; i < len; i++)
466                 list[i] = cscov++;
467
468         rc = dccp_feat_register_sp(sk, DCCPF_MIN_CSUM_COVER, rx, list, len);
469
470         if (rc == 0) {
471                 if (rx)
472                         dccp_sk(sk)->dccps_pcrlen = cscov;
473                 else
474                         dccp_sk(sk)->dccps_pcslen = cscov;
475         }
476         kfree(list);
477         return rc;
478 }
479
480 static int dccp_setsockopt_ccid(struct sock *sk, int type,
481                                 sockptr_t optval, unsigned int optlen)
482 {
483         u8 *val;
484         int rc = 0;
485
486         if (optlen < 1 || optlen > DCCP_FEAT_MAX_SP_VALS)
487                 return -EINVAL;
488
489         val = memdup_sockptr(optval, optlen);
490         if (IS_ERR(val))
491                 return PTR_ERR(val);
492
493         lock_sock(sk);
494         if (type == DCCP_SOCKOPT_TX_CCID || type == DCCP_SOCKOPT_CCID)
495                 rc = dccp_feat_register_sp(sk, DCCPF_CCID, 1, val, optlen);
496
497         if (!rc && (type == DCCP_SOCKOPT_RX_CCID || type == DCCP_SOCKOPT_CCID))
498                 rc = dccp_feat_register_sp(sk, DCCPF_CCID, 0, val, optlen);
499         release_sock(sk);
500
501         kfree(val);
502         return rc;
503 }
504
505 static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
506                 sockptr_t optval, unsigned int optlen)
507 {
508         struct dccp_sock *dp = dccp_sk(sk);
509         int val, err = 0;
510
511         switch (optname) {
512         case DCCP_SOCKOPT_PACKET_SIZE:
513                 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
514                 return 0;
515         case DCCP_SOCKOPT_CHANGE_L:
516         case DCCP_SOCKOPT_CHANGE_R:
517                 DCCP_WARN("sockopt(CHANGE_L/R) is deprecated: fix your app\n");
518                 return 0;
519         case DCCP_SOCKOPT_CCID:
520         case DCCP_SOCKOPT_RX_CCID:
521         case DCCP_SOCKOPT_TX_CCID:
522                 return dccp_setsockopt_ccid(sk, optname, optval, optlen);
523         }
524
525         if (optlen < (int)sizeof(int))
526                 return -EINVAL;
527
528         if (copy_from_sockptr(&val, optval, sizeof(int)))
529                 return -EFAULT;
530
531         if (optname == DCCP_SOCKOPT_SERVICE)
532                 return dccp_setsockopt_service(sk, val, optval, optlen);
533
534         lock_sock(sk);
535         switch (optname) {
536         case DCCP_SOCKOPT_SERVER_TIMEWAIT:
537                 if (dp->dccps_role != DCCP_ROLE_SERVER)
538                         err = -EOPNOTSUPP;
539                 else
540                         dp->dccps_server_timewait = (val != 0);
541                 break;
542         case DCCP_SOCKOPT_SEND_CSCOV:
543                 err = dccp_setsockopt_cscov(sk, val, false);
544                 break;
545         case DCCP_SOCKOPT_RECV_CSCOV:
546                 err = dccp_setsockopt_cscov(sk, val, true);
547                 break;
548         case DCCP_SOCKOPT_QPOLICY_ID:
549                 if (sk->sk_state != DCCP_CLOSED)
550                         err = -EISCONN;
551                 else if (val < 0 || val >= DCCPQ_POLICY_MAX)
552                         err = -EINVAL;
553                 else
554                         dp->dccps_qpolicy = val;
555                 break;
556         case DCCP_SOCKOPT_QPOLICY_TXQLEN:
557                 if (val < 0)
558                         err = -EINVAL;
559                 else
560                         dp->dccps_tx_qlen = val;
561                 break;
562         default:
563                 err = -ENOPROTOOPT;
564                 break;
565         }
566         release_sock(sk);
567
568         return err;
569 }
570
571 int dccp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
572                     unsigned int optlen)
573 {
574         if (level != SOL_DCCP)
575                 return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
576                                                              optname, optval,
577                                                              optlen);
578         return do_dccp_setsockopt(sk, level, optname, optval, optlen);
579 }
580
581 EXPORT_SYMBOL_GPL(dccp_setsockopt);
582
583 static int dccp_getsockopt_service(struct sock *sk, int len,
584                                    __be32 __user *optval,
585                                    int __user *optlen)
586 {
587         const struct dccp_sock *dp = dccp_sk(sk);
588         const struct dccp_service_list *sl;
589         int err = -ENOENT, slen = 0, total_len = sizeof(u32);
590
591         lock_sock(sk);
592         if ((sl = dp->dccps_service_list) != NULL) {
593                 slen = sl->dccpsl_nr * sizeof(u32);
594                 total_len += slen;
595         }
596
597         err = -EINVAL;
598         if (total_len > len)
599                 goto out;
600
601         err = 0;
602         if (put_user(total_len, optlen) ||
603             put_user(dp->dccps_service, optval) ||
604             (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
605                 err = -EFAULT;
606 out:
607         release_sock(sk);
608         return err;
609 }
610
611 static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
612                     char __user *optval, int __user *optlen)
613 {
614         struct dccp_sock *dp;
615         int val, len;
616
617         if (get_user(len, optlen))
618                 return -EFAULT;
619
620         if (len < (int)sizeof(int))
621                 return -EINVAL;
622
623         dp = dccp_sk(sk);
624
625         switch (optname) {
626         case DCCP_SOCKOPT_PACKET_SIZE:
627                 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
628                 return 0;
629         case DCCP_SOCKOPT_SERVICE:
630                 return dccp_getsockopt_service(sk, len,
631                                                (__be32 __user *)optval, optlen);
632         case DCCP_SOCKOPT_GET_CUR_MPS:
633                 val = READ_ONCE(dp->dccps_mss_cache);
634                 break;
635         case DCCP_SOCKOPT_AVAILABLE_CCIDS:
636                 return ccid_getsockopt_builtin_ccids(sk, len, optval, optlen);
637         case DCCP_SOCKOPT_TX_CCID:
638                 val = ccid_get_current_tx_ccid(dp);
639                 if (val < 0)
640                         return -ENOPROTOOPT;
641                 break;
642         case DCCP_SOCKOPT_RX_CCID:
643                 val = ccid_get_current_rx_ccid(dp);
644                 if (val < 0)
645                         return -ENOPROTOOPT;
646                 break;
647         case DCCP_SOCKOPT_SERVER_TIMEWAIT:
648                 val = dp->dccps_server_timewait;
649                 break;
650         case DCCP_SOCKOPT_SEND_CSCOV:
651                 val = dp->dccps_pcslen;
652                 break;
653         case DCCP_SOCKOPT_RECV_CSCOV:
654                 val = dp->dccps_pcrlen;
655                 break;
656         case DCCP_SOCKOPT_QPOLICY_ID:
657                 val = dp->dccps_qpolicy;
658                 break;
659         case DCCP_SOCKOPT_QPOLICY_TXQLEN:
660                 val = dp->dccps_tx_qlen;
661                 break;
662         case 128 ... 191:
663                 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
664                                              len, (u32 __user *)optval, optlen);
665         case 192 ... 255:
666                 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
667                                              len, (u32 __user *)optval, optlen);
668         default:
669                 return -ENOPROTOOPT;
670         }
671
672         len = sizeof(val);
673         if (put_user(len, optlen) || copy_to_user(optval, &val, len))
674                 return -EFAULT;
675
676         return 0;
677 }
678
679 int dccp_getsockopt(struct sock *sk, int level, int optname,
680                     char __user *optval, int __user *optlen)
681 {
682         if (level != SOL_DCCP)
683                 return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
684                                                              optname, optval,
685                                                              optlen);
686         return do_dccp_getsockopt(sk, level, optname, optval, optlen);
687 }
688
689 EXPORT_SYMBOL_GPL(dccp_getsockopt);
690
691 static int dccp_msghdr_parse(struct msghdr *msg, struct sk_buff *skb)
692 {
693         struct cmsghdr *cmsg;
694
695         /*
696          * Assign an (opaque) qpolicy priority value to skb->priority.
697          *
698          * We are overloading this skb field for use with the qpolicy subystem.
699          * The skb->priority is normally used for the SO_PRIORITY option, which
700          * is initialised from sk_priority. Since the assignment of sk_priority
701          * to skb->priority happens later (on layer 3), we overload this field
702          * for use with queueing priorities as long as the skb is on layer 4.
703          * The default priority value (if nothing is set) is 0.
704          */
705         skb->priority = 0;
706
707         for_each_cmsghdr(cmsg, msg) {
708                 if (!CMSG_OK(msg, cmsg))
709                         return -EINVAL;
710
711                 if (cmsg->cmsg_level != SOL_DCCP)
712                         continue;
713
714                 if (cmsg->cmsg_type <= DCCP_SCM_QPOLICY_MAX &&
715                     !dccp_qpolicy_param_ok(skb->sk, cmsg->cmsg_type))
716                         return -EINVAL;
717
718                 switch (cmsg->cmsg_type) {
719                 case DCCP_SCM_PRIORITY:
720                         if (cmsg->cmsg_len != CMSG_LEN(sizeof(__u32)))
721                                 return -EINVAL;
722                         skb->priority = *(__u32 *)CMSG_DATA(cmsg);
723                         break;
724                 default:
725                         return -EINVAL;
726                 }
727         }
728         return 0;
729 }
730
731 int dccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
732 {
733         const struct dccp_sock *dp = dccp_sk(sk);
734         const int flags = msg->msg_flags;
735         const int noblock = flags & MSG_DONTWAIT;
736         struct sk_buff *skb;
737         int rc, size;
738         long timeo;
739
740         trace_dccp_probe(sk, len);
741
742         if (len > READ_ONCE(dp->dccps_mss_cache))
743                 return -EMSGSIZE;
744
745         lock_sock(sk);
746
747         timeo = sock_sndtimeo(sk, noblock);
748
749         /*
750          * We have to use sk_stream_wait_connect here to set sk_write_pending,
751          * so that the trick in dccp_rcv_request_sent_state_process.
752          */
753         /* Wait for a connection to finish. */
754         if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN))
755                 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
756                         goto out_release;
757
758         size = sk->sk_prot->max_header + len;
759         release_sock(sk);
760         skb = sock_alloc_send_skb(sk, size, noblock, &rc);
761         lock_sock(sk);
762         if (skb == NULL)
763                 goto out_release;
764
765         if (dccp_qpolicy_full(sk)) {
766                 rc = -EAGAIN;
767                 goto out_discard;
768         }
769
770         if (sk->sk_state == DCCP_CLOSED) {
771                 rc = -ENOTCONN;
772                 goto out_discard;
773         }
774
775         /* We need to check dccps_mss_cache after socket is locked. */
776         if (len > dp->dccps_mss_cache) {
777                 rc = -EMSGSIZE;
778                 goto out_discard;
779         }
780
781         skb_reserve(skb, sk->sk_prot->max_header);
782         rc = memcpy_from_msg(skb_put(skb, len), msg, len);
783         if (rc != 0)
784                 goto out_discard;
785
786         rc = dccp_msghdr_parse(msg, skb);
787         if (rc != 0)
788                 goto out_discard;
789
790         dccp_qpolicy_push(sk, skb);
791         /*
792          * The xmit_timer is set if the TX CCID is rate-based and will expire
793          * when congestion control permits to release further packets into the
794          * network. Window-based CCIDs do not use this timer.
795          */
796         if (!timer_pending(&dp->dccps_xmit_timer))
797                 dccp_write_xmit(sk);
798 out_release:
799         release_sock(sk);
800         return rc ? : len;
801 out_discard:
802         kfree_skb(skb);
803         goto out_release;
804 }
805
806 EXPORT_SYMBOL_GPL(dccp_sendmsg);
807
808 int dccp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags,
809                  int *addr_len)
810 {
811         const struct dccp_hdr *dh;
812         long timeo;
813
814         lock_sock(sk);
815
816         if (sk->sk_state == DCCP_LISTEN) {
817                 len = -ENOTCONN;
818                 goto out;
819         }
820
821         timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
822
823         do {
824                 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
825
826                 if (skb == NULL)
827                         goto verify_sock_status;
828
829                 dh = dccp_hdr(skb);
830
831                 switch (dh->dccph_type) {
832                 case DCCP_PKT_DATA:
833                 case DCCP_PKT_DATAACK:
834                         goto found_ok_skb;
835
836                 case DCCP_PKT_CLOSE:
837                 case DCCP_PKT_CLOSEREQ:
838                         if (!(flags & MSG_PEEK))
839                                 dccp_finish_passive_close(sk);
840                         fallthrough;
841                 case DCCP_PKT_RESET:
842                         dccp_pr_debug("found fin (%s) ok!\n",
843                                       dccp_packet_name(dh->dccph_type));
844                         len = 0;
845                         goto found_fin_ok;
846                 default:
847                         dccp_pr_debug("packet_type=%s\n",
848                                       dccp_packet_name(dh->dccph_type));
849                         sk_eat_skb(sk, skb);
850                 }
851 verify_sock_status:
852                 if (sock_flag(sk, SOCK_DONE)) {
853                         len = 0;
854                         break;
855                 }
856
857                 if (sk->sk_err) {
858                         len = sock_error(sk);
859                         break;
860                 }
861
862                 if (sk->sk_shutdown & RCV_SHUTDOWN) {
863                         len = 0;
864                         break;
865                 }
866
867                 if (sk->sk_state == DCCP_CLOSED) {
868                         if (!sock_flag(sk, SOCK_DONE)) {
869                                 /* This occurs when user tries to read
870                                  * from never connected socket.
871                                  */
872                                 len = -ENOTCONN;
873                                 break;
874                         }
875                         len = 0;
876                         break;
877                 }
878
879                 if (!timeo) {
880                         len = -EAGAIN;
881                         break;
882                 }
883
884                 if (signal_pending(current)) {
885                         len = sock_intr_errno(timeo);
886                         break;
887                 }
888
889                 sk_wait_data(sk, &timeo, NULL);
890                 continue;
891         found_ok_skb:
892                 if (len > skb->len)
893                         len = skb->len;
894                 else if (len < skb->len)
895                         msg->msg_flags |= MSG_TRUNC;
896
897                 if (skb_copy_datagram_msg(skb, 0, msg, len)) {
898                         /* Exception. Bailout! */
899                         len = -EFAULT;
900                         break;
901                 }
902                 if (flags & MSG_TRUNC)
903                         len = skb->len;
904         found_fin_ok:
905                 if (!(flags & MSG_PEEK))
906                         sk_eat_skb(sk, skb);
907                 break;
908         } while (1);
909 out:
910         release_sock(sk);
911         return len;
912 }
913
914 EXPORT_SYMBOL_GPL(dccp_recvmsg);
915
916 int inet_dccp_listen(struct socket *sock, int backlog)
917 {
918         struct sock *sk = sock->sk;
919         unsigned char old_state;
920         int err;
921
922         lock_sock(sk);
923
924         err = -EINVAL;
925         if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
926                 goto out;
927
928         old_state = sk->sk_state;
929         if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
930                 goto out;
931
932         WRITE_ONCE(sk->sk_max_ack_backlog, backlog);
933         /* Really, if the socket is already in listen state
934          * we can only allow the backlog to be adjusted.
935          */
936         if (old_state != DCCP_LISTEN) {
937                 struct dccp_sock *dp = dccp_sk(sk);
938
939                 dp->dccps_role = DCCP_ROLE_LISTEN;
940
941                 /* do not start to listen if feature negotiation setup fails */
942                 if (dccp_feat_finalise_settings(dp)) {
943                         err = -EPROTO;
944                         goto out;
945                 }
946
947                 err = inet_csk_listen_start(sk);
948                 if (err)
949                         goto out;
950         }
951         err = 0;
952
953 out:
954         release_sock(sk);
955         return err;
956 }
957
958 EXPORT_SYMBOL_GPL(inet_dccp_listen);
959
960 static void dccp_terminate_connection(struct sock *sk)
961 {
962         u8 next_state = DCCP_CLOSED;
963
964         switch (sk->sk_state) {
965         case DCCP_PASSIVE_CLOSE:
966         case DCCP_PASSIVE_CLOSEREQ:
967                 dccp_finish_passive_close(sk);
968                 break;
969         case DCCP_PARTOPEN:
970                 dccp_pr_debug("Stop PARTOPEN timer (%p)\n", sk);
971                 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
972                 fallthrough;
973         case DCCP_OPEN:
974                 dccp_send_close(sk, 1);
975
976                 if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER &&
977                     !dccp_sk(sk)->dccps_server_timewait)
978                         next_state = DCCP_ACTIVE_CLOSEREQ;
979                 else
980                         next_state = DCCP_CLOSING;
981                 fallthrough;
982         default:
983                 dccp_set_state(sk, next_state);
984         }
985 }
986
987 void dccp_close(struct sock *sk, long timeout)
988 {
989         struct dccp_sock *dp = dccp_sk(sk);
990         struct sk_buff *skb;
991         u32 data_was_unread = 0;
992         int state;
993
994         lock_sock(sk);
995
996         sk->sk_shutdown = SHUTDOWN_MASK;
997
998         if (sk->sk_state == DCCP_LISTEN) {
999                 dccp_set_state(sk, DCCP_CLOSED);
1000
1001                 /* Special case. */
1002                 inet_csk_listen_stop(sk);
1003
1004                 goto adjudge_to_death;
1005         }
1006
1007         sk_stop_timer(sk, &dp->dccps_xmit_timer);
1008
1009         /*
1010          * We need to flush the recv. buffs.  We do this only on the
1011          * descriptor close, not protocol-sourced closes, because the
1012           *reader process may not have drained the data yet!
1013          */
1014         while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
1015                 data_was_unread += skb->len;
1016                 __kfree_skb(skb);
1017         }
1018
1019         /* If socket has been already reset kill it. */
1020         if (sk->sk_state == DCCP_CLOSED)
1021                 goto adjudge_to_death;
1022
1023         if (data_was_unread) {
1024                 /* Unread data was tossed, send an appropriate Reset Code */
1025                 DCCP_WARN("ABORT with %u bytes unread\n", data_was_unread);
1026                 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
1027                 dccp_set_state(sk, DCCP_CLOSED);
1028         } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
1029                 /* Check zero linger _after_ checking for unread data. */
1030                 sk->sk_prot->disconnect(sk, 0);
1031         } else if (sk->sk_state != DCCP_CLOSED) {
1032                 /*
1033                  * Normal connection termination. May need to wait if there are
1034                  * still packets in the TX queue that are delayed by the CCID.
1035                  */
1036                 dccp_flush_write_queue(sk, &timeout);
1037                 dccp_terminate_connection(sk);
1038         }
1039
1040         /*
1041          * Flush write queue. This may be necessary in several cases:
1042          * - we have been closed by the peer but still have application data;
1043          * - abortive termination (unread data or zero linger time),
1044          * - normal termination but queue could not be flushed within time limit
1045          */
1046         __skb_queue_purge(&sk->sk_write_queue);
1047
1048         sk_stream_wait_close(sk, timeout);
1049
1050 adjudge_to_death:
1051         state = sk->sk_state;
1052         sock_hold(sk);
1053         sock_orphan(sk);
1054
1055         /*
1056          * It is the last release_sock in its life. It will remove backlog.
1057          */
1058         release_sock(sk);
1059         /*
1060          * Now socket is owned by kernel and we acquire BH lock
1061          * to finish close. No need to check for user refs.
1062          */
1063         local_bh_disable();
1064         bh_lock_sock(sk);
1065         WARN_ON(sock_owned_by_user(sk));
1066
1067         this_cpu_inc(dccp_orphan_count);
1068
1069         /* Have we already been destroyed by a softirq or backlog? */
1070         if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
1071                 goto out;
1072
1073         if (sk->sk_state == DCCP_CLOSED)
1074                 inet_csk_destroy_sock(sk);
1075
1076         /* Otherwise, socket is reprieved until protocol close. */
1077
1078 out:
1079         bh_unlock_sock(sk);
1080         local_bh_enable();
1081         sock_put(sk);
1082 }
1083
1084 EXPORT_SYMBOL_GPL(dccp_close);
1085
1086 void dccp_shutdown(struct sock *sk, int how)
1087 {
1088         dccp_pr_debug("called shutdown(%x)\n", how);
1089 }
1090
1091 EXPORT_SYMBOL_GPL(dccp_shutdown);
1092
1093 static inline int __init dccp_mib_init(void)
1094 {
1095         dccp_statistics = alloc_percpu(struct dccp_mib);
1096         if (!dccp_statistics)
1097                 return -ENOMEM;
1098         return 0;
1099 }
1100
1101 static inline void dccp_mib_exit(void)
1102 {
1103         free_percpu(dccp_statistics);
1104 }
1105
1106 static int thash_entries;
1107 module_param(thash_entries, int, 0444);
1108 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
1109
1110 #ifdef CONFIG_IP_DCCP_DEBUG
1111 bool dccp_debug;
1112 module_param(dccp_debug, bool, 0644);
1113 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
1114
1115 EXPORT_SYMBOL_GPL(dccp_debug);
1116 #endif
1117
1118 static int __init dccp_init(void)
1119 {
1120         unsigned long goal;
1121         unsigned long nr_pages = totalram_pages();
1122         int ehash_order, bhash_order, i;
1123         int rc;
1124
1125         BUILD_BUG_ON(sizeof(struct dccp_skb_cb) >
1126                      sizeof_field(struct sk_buff, cb));
1127         rc = inet_hashinfo2_init_mod(&dccp_hashinfo);
1128         if (rc)
1129                 goto out_fail;
1130         rc = -ENOBUFS;
1131         dccp_hashinfo.bind_bucket_cachep =
1132                 kmem_cache_create("dccp_bind_bucket",
1133                                   sizeof(struct inet_bind_bucket), 0,
1134                                   SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT, NULL);
1135         if (!dccp_hashinfo.bind_bucket_cachep)
1136                 goto out_free_hashinfo2;
1137         dccp_hashinfo.bind2_bucket_cachep =
1138                 kmem_cache_create("dccp_bind2_bucket",
1139                                   sizeof(struct inet_bind2_bucket), 0,
1140                                   SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT, NULL);
1141         if (!dccp_hashinfo.bind2_bucket_cachep)
1142                 goto out_free_bind_bucket_cachep;
1143
1144         /*
1145          * Size and allocate the main established and bind bucket
1146          * hash tables.
1147          *
1148          * The methodology is similar to that of the buffer cache.
1149          */
1150         if (nr_pages >= (128 * 1024))
1151                 goal = nr_pages >> (21 - PAGE_SHIFT);
1152         else
1153                 goal = nr_pages >> (23 - PAGE_SHIFT);
1154
1155         if (thash_entries)
1156                 goal = (thash_entries *
1157                         sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
1158         for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
1159                 ;
1160         do {
1161                 unsigned long hash_size = (1UL << ehash_order) * PAGE_SIZE /
1162                                         sizeof(struct inet_ehash_bucket);
1163
1164                 while (hash_size & (hash_size - 1))
1165                         hash_size--;
1166                 dccp_hashinfo.ehash_mask = hash_size - 1;
1167                 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1168                         __get_free_pages(GFP_ATOMIC|__GFP_NOWARN, ehash_order);
1169         } while (!dccp_hashinfo.ehash && --ehash_order > 0);
1170
1171         if (!dccp_hashinfo.ehash) {
1172                 DCCP_CRIT("Failed to allocate DCCP established hash table");
1173                 goto out_free_bind2_bucket_cachep;
1174         }
1175
1176         for (i = 0; i <= dccp_hashinfo.ehash_mask; i++)
1177                 INIT_HLIST_NULLS_HEAD(&dccp_hashinfo.ehash[i].chain, i);
1178
1179         if (inet_ehash_locks_alloc(&dccp_hashinfo))
1180                         goto out_free_dccp_ehash;
1181
1182         bhash_order = ehash_order;
1183
1184         do {
1185                 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1186                                         sizeof(struct inet_bind_hashbucket);
1187                 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1188                     bhash_order > 0)
1189                         continue;
1190                 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1191                         __get_free_pages(GFP_ATOMIC|__GFP_NOWARN, bhash_order);
1192         } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1193
1194         if (!dccp_hashinfo.bhash) {
1195                 DCCP_CRIT("Failed to allocate DCCP bind hash table");
1196                 goto out_free_dccp_locks;
1197         }
1198
1199         dccp_hashinfo.bhash2 = (struct inet_bind_hashbucket *)
1200                 __get_free_pages(GFP_ATOMIC | __GFP_NOWARN, bhash_order);
1201
1202         if (!dccp_hashinfo.bhash2) {
1203                 DCCP_CRIT("Failed to allocate DCCP bind2 hash table");
1204                 goto out_free_dccp_bhash;
1205         }
1206
1207         for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1208                 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1209                 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1210                 spin_lock_init(&dccp_hashinfo.bhash2[i].lock);
1211                 INIT_HLIST_HEAD(&dccp_hashinfo.bhash2[i].chain);
1212         }
1213
1214         dccp_hashinfo.pernet = false;
1215
1216         rc = dccp_mib_init();
1217         if (rc)
1218                 goto out_free_dccp_bhash2;
1219
1220         rc = dccp_ackvec_init();
1221         if (rc)
1222                 goto out_free_dccp_mib;
1223
1224         rc = dccp_sysctl_init();
1225         if (rc)
1226                 goto out_ackvec_exit;
1227
1228         rc = ccid_initialize_builtins();
1229         if (rc)
1230                 goto out_sysctl_exit;
1231
1232         dccp_timestamping_init();
1233
1234         return 0;
1235
1236 out_sysctl_exit:
1237         dccp_sysctl_exit();
1238 out_ackvec_exit:
1239         dccp_ackvec_exit();
1240 out_free_dccp_mib:
1241         dccp_mib_exit();
1242 out_free_dccp_bhash2:
1243         free_pages((unsigned long)dccp_hashinfo.bhash2, bhash_order);
1244 out_free_dccp_bhash:
1245         free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1246 out_free_dccp_locks:
1247         inet_ehash_locks_free(&dccp_hashinfo);
1248 out_free_dccp_ehash:
1249         free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1250 out_free_bind2_bucket_cachep:
1251         kmem_cache_destroy(dccp_hashinfo.bind2_bucket_cachep);
1252 out_free_bind_bucket_cachep:
1253         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1254 out_free_hashinfo2:
1255         inet_hashinfo2_free_mod(&dccp_hashinfo);
1256 out_fail:
1257         dccp_hashinfo.bhash = NULL;
1258         dccp_hashinfo.bhash2 = NULL;
1259         dccp_hashinfo.ehash = NULL;
1260         dccp_hashinfo.bind_bucket_cachep = NULL;
1261         dccp_hashinfo.bind2_bucket_cachep = NULL;
1262         return rc;
1263 }
1264
1265 static void __exit dccp_fini(void)
1266 {
1267         int bhash_order = get_order(dccp_hashinfo.bhash_size *
1268                                     sizeof(struct inet_bind_hashbucket));
1269
1270         ccid_cleanup_builtins();
1271         dccp_mib_exit();
1272         free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1273         free_pages((unsigned long)dccp_hashinfo.bhash2, bhash_order);
1274         free_pages((unsigned long)dccp_hashinfo.ehash,
1275                    get_order((dccp_hashinfo.ehash_mask + 1) *
1276                              sizeof(struct inet_ehash_bucket)));
1277         inet_ehash_locks_free(&dccp_hashinfo);
1278         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1279         dccp_ackvec_exit();
1280         dccp_sysctl_exit();
1281         inet_hashinfo2_free_mod(&dccp_hashinfo);
1282 }
1283
1284 module_init(dccp_init);
1285 module_exit(dccp_fini);
1286
1287 MODULE_LICENSE("GPL");
1288 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1289 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");