bpf: Factor out socket lookup functions for the TC hookpoint.
[platform/kernel/linux-starfive.git] / net / dccp / proto.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  *  net/dccp/proto.c
4  *
5  *  An implementation of the DCCP protocol
6  *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
7  */
8
9 #include <linux/dccp.h>
10 #include <linux/module.h>
11 #include <linux/types.h>
12 #include <linux/sched.h>
13 #include <linux/kernel.h>
14 #include <linux/skbuff.h>
15 #include <linux/netdevice.h>
16 #include <linux/in.h>
17 #include <linux/if_arp.h>
18 #include <linux/init.h>
19 #include <linux/random.h>
20 #include <linux/slab.h>
21 #include <net/checksum.h>
22
23 #include <net/inet_sock.h>
24 #include <net/inet_common.h>
25 #include <net/sock.h>
26 #include <net/xfrm.h>
27
28 #include <asm/ioctls.h>
29 #include <linux/spinlock.h>
30 #include <linux/timer.h>
31 #include <linux/delay.h>
32 #include <linux/poll.h>
33
34 #include "ccid.h"
35 #include "dccp.h"
36 #include "feat.h"
37
38 #define CREATE_TRACE_POINTS
39 #include "trace.h"
40
41 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
42
43 EXPORT_SYMBOL_GPL(dccp_statistics);
44
45 DEFINE_PER_CPU(unsigned int, dccp_orphan_count);
46 EXPORT_PER_CPU_SYMBOL_GPL(dccp_orphan_count);
47
48 struct inet_hashinfo dccp_hashinfo;
49 EXPORT_SYMBOL_GPL(dccp_hashinfo);
50
51 /* the maximum queue length for tx in packets. 0 is no limit */
52 int sysctl_dccp_tx_qlen __read_mostly = 5;
53
54 #ifdef CONFIG_IP_DCCP_DEBUG
55 static const char *dccp_state_name(const int state)
56 {
57         static const char *const dccp_state_names[] = {
58         [DCCP_OPEN]             = "OPEN",
59         [DCCP_REQUESTING]       = "REQUESTING",
60         [DCCP_PARTOPEN]         = "PARTOPEN",
61         [DCCP_LISTEN]           = "LISTEN",
62         [DCCP_RESPOND]          = "RESPOND",
63         [DCCP_CLOSING]          = "CLOSING",
64         [DCCP_ACTIVE_CLOSEREQ]  = "CLOSEREQ",
65         [DCCP_PASSIVE_CLOSE]    = "PASSIVE_CLOSE",
66         [DCCP_PASSIVE_CLOSEREQ] = "PASSIVE_CLOSEREQ",
67         [DCCP_TIME_WAIT]        = "TIME_WAIT",
68         [DCCP_CLOSED]           = "CLOSED",
69         };
70
71         if (state >= DCCP_MAX_STATES)
72                 return "INVALID STATE!";
73         else
74                 return dccp_state_names[state];
75 }
76 #endif
77
78 void dccp_set_state(struct sock *sk, const int state)
79 {
80         const int oldstate = sk->sk_state;
81
82         dccp_pr_debug("%s(%p)  %s  -->  %s\n", dccp_role(sk), sk,
83                       dccp_state_name(oldstate), dccp_state_name(state));
84         WARN_ON(state == oldstate);
85
86         switch (state) {
87         case DCCP_OPEN:
88                 if (oldstate != DCCP_OPEN)
89                         DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
90                 /* Client retransmits all Confirm options until entering OPEN */
91                 if (oldstate == DCCP_PARTOPEN)
92                         dccp_feat_list_purge(&dccp_sk(sk)->dccps_featneg);
93                 break;
94
95         case DCCP_CLOSED:
96                 if (oldstate == DCCP_OPEN || oldstate == DCCP_ACTIVE_CLOSEREQ ||
97                     oldstate == DCCP_CLOSING)
98                         DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
99
100                 sk->sk_prot->unhash(sk);
101                 if (inet_csk(sk)->icsk_bind_hash != NULL &&
102                     !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
103                         inet_put_port(sk);
104                 fallthrough;
105         default:
106                 if (oldstate == DCCP_OPEN)
107                         DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
108         }
109
110         /* Change state AFTER socket is unhashed to avoid closed
111          * socket sitting in hash tables.
112          */
113         inet_sk_set_state(sk, state);
114 }
115
116 EXPORT_SYMBOL_GPL(dccp_set_state);
117
118 static void dccp_finish_passive_close(struct sock *sk)
119 {
120         switch (sk->sk_state) {
121         case DCCP_PASSIVE_CLOSE:
122                 /* Node (client or server) has received Close packet. */
123                 dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED);
124                 dccp_set_state(sk, DCCP_CLOSED);
125                 break;
126         case DCCP_PASSIVE_CLOSEREQ:
127                 /*
128                  * Client received CloseReq. We set the `active' flag so that
129                  * dccp_send_close() retransmits the Close as per RFC 4340, 8.3.
130                  */
131                 dccp_send_close(sk, 1);
132                 dccp_set_state(sk, DCCP_CLOSING);
133         }
134 }
135
136 void dccp_done(struct sock *sk)
137 {
138         dccp_set_state(sk, DCCP_CLOSED);
139         dccp_clear_xmit_timers(sk);
140
141         sk->sk_shutdown = SHUTDOWN_MASK;
142
143         if (!sock_flag(sk, SOCK_DEAD))
144                 sk->sk_state_change(sk);
145         else
146                 inet_csk_destroy_sock(sk);
147 }
148
149 EXPORT_SYMBOL_GPL(dccp_done);
150
151 const char *dccp_packet_name(const int type)
152 {
153         static const char *const dccp_packet_names[] = {
154                 [DCCP_PKT_REQUEST]  = "REQUEST",
155                 [DCCP_PKT_RESPONSE] = "RESPONSE",
156                 [DCCP_PKT_DATA]     = "DATA",
157                 [DCCP_PKT_ACK]      = "ACK",
158                 [DCCP_PKT_DATAACK]  = "DATAACK",
159                 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
160                 [DCCP_PKT_CLOSE]    = "CLOSE",
161                 [DCCP_PKT_RESET]    = "RESET",
162                 [DCCP_PKT_SYNC]     = "SYNC",
163                 [DCCP_PKT_SYNCACK]  = "SYNCACK",
164         };
165
166         if (type >= DCCP_NR_PKT_TYPES)
167                 return "INVALID";
168         else
169                 return dccp_packet_names[type];
170 }
171
172 EXPORT_SYMBOL_GPL(dccp_packet_name);
173
174 void dccp_destruct_common(struct sock *sk)
175 {
176         struct dccp_sock *dp = dccp_sk(sk);
177
178         ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
179         dp->dccps_hc_tx_ccid = NULL;
180 }
181 EXPORT_SYMBOL_GPL(dccp_destruct_common);
182
183 static void dccp_sk_destruct(struct sock *sk)
184 {
185         dccp_destruct_common(sk);
186         inet_sock_destruct(sk);
187 }
188
189 int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
190 {
191         struct dccp_sock *dp = dccp_sk(sk);
192         struct inet_connection_sock *icsk = inet_csk(sk);
193
194         icsk->icsk_rto          = DCCP_TIMEOUT_INIT;
195         icsk->icsk_syn_retries  = sysctl_dccp_request_retries;
196         sk->sk_state            = DCCP_CLOSED;
197         sk->sk_write_space      = dccp_write_space;
198         sk->sk_destruct         = dccp_sk_destruct;
199         icsk->icsk_sync_mss     = dccp_sync_mss;
200         dp->dccps_mss_cache     = 536;
201         dp->dccps_rate_last     = jiffies;
202         dp->dccps_role          = DCCP_ROLE_UNDEFINED;
203         dp->dccps_service       = DCCP_SERVICE_CODE_IS_ABSENT;
204         dp->dccps_tx_qlen       = sysctl_dccp_tx_qlen;
205
206         dccp_init_xmit_timers(sk);
207
208         INIT_LIST_HEAD(&dp->dccps_featneg);
209         /* control socket doesn't need feat nego */
210         if (likely(ctl_sock_initialized))
211                 return dccp_feat_init(sk);
212         return 0;
213 }
214
215 EXPORT_SYMBOL_GPL(dccp_init_sock);
216
217 void dccp_destroy_sock(struct sock *sk)
218 {
219         struct dccp_sock *dp = dccp_sk(sk);
220
221         __skb_queue_purge(&sk->sk_write_queue);
222         if (sk->sk_send_head != NULL) {
223                 kfree_skb(sk->sk_send_head);
224                 sk->sk_send_head = NULL;
225         }
226
227         /* Clean up a referenced DCCP bind bucket. */
228         if (inet_csk(sk)->icsk_bind_hash != NULL)
229                 inet_put_port(sk);
230
231         kfree(dp->dccps_service_list);
232         dp->dccps_service_list = NULL;
233
234         if (dp->dccps_hc_rx_ackvec != NULL) {
235                 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
236                 dp->dccps_hc_rx_ackvec = NULL;
237         }
238         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
239         dp->dccps_hc_rx_ccid = NULL;
240
241         /* clean up feature negotiation state */
242         dccp_feat_list_purge(&dp->dccps_featneg);
243 }
244
245 EXPORT_SYMBOL_GPL(dccp_destroy_sock);
246
247 static inline int dccp_need_reset(int state)
248 {
249         return state != DCCP_CLOSED && state != DCCP_LISTEN &&
250                state != DCCP_REQUESTING;
251 }
252
253 int dccp_disconnect(struct sock *sk, int flags)
254 {
255         struct inet_connection_sock *icsk = inet_csk(sk);
256         struct inet_sock *inet = inet_sk(sk);
257         struct dccp_sock *dp = dccp_sk(sk);
258         const int old_state = sk->sk_state;
259
260         if (old_state != DCCP_CLOSED)
261                 dccp_set_state(sk, DCCP_CLOSED);
262
263         /*
264          * This corresponds to the ABORT function of RFC793, sec. 3.8
265          * TCP uses a RST segment, DCCP a Reset packet with Code 2, "Aborted".
266          */
267         if (old_state == DCCP_LISTEN) {
268                 inet_csk_listen_stop(sk);
269         } else if (dccp_need_reset(old_state)) {
270                 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
271                 sk->sk_err = ECONNRESET;
272         } else if (old_state == DCCP_REQUESTING)
273                 sk->sk_err = ECONNRESET;
274
275         dccp_clear_xmit_timers(sk);
276         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
277         dp->dccps_hc_rx_ccid = NULL;
278
279         __skb_queue_purge(&sk->sk_receive_queue);
280         __skb_queue_purge(&sk->sk_write_queue);
281         if (sk->sk_send_head != NULL) {
282                 __kfree_skb(sk->sk_send_head);
283                 sk->sk_send_head = NULL;
284         }
285
286         inet->inet_dport = 0;
287
288         inet_bhash2_reset_saddr(sk);
289
290         sk->sk_shutdown = 0;
291         sock_reset_flag(sk, SOCK_DONE);
292
293         icsk->icsk_backoff = 0;
294         inet_csk_delack_init(sk);
295         __sk_dst_reset(sk);
296
297         WARN_ON(inet->inet_num && !icsk->icsk_bind_hash);
298
299         sk_error_report(sk);
300         return 0;
301 }
302
303 EXPORT_SYMBOL_GPL(dccp_disconnect);
304
305 /*
306  *      Wait for a DCCP event.
307  *
308  *      Note that we don't need to lock the socket, as the upper poll layers
309  *      take care of normal races (between the test and the event) and we don't
310  *      go look at any of the socket buffers directly.
311  */
312 __poll_t dccp_poll(struct file *file, struct socket *sock,
313                        poll_table *wait)
314 {
315         __poll_t mask;
316         struct sock *sk = sock->sk;
317
318         sock_poll_wait(file, sock, wait);
319         if (sk->sk_state == DCCP_LISTEN)
320                 return inet_csk_listen_poll(sk);
321
322         /* Socket is not locked. We are protected from async events
323            by poll logic and correct handling of state changes
324            made by another threads is impossible in any case.
325          */
326
327         mask = 0;
328         if (sk->sk_err)
329                 mask = EPOLLERR;
330
331         if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
332                 mask |= EPOLLHUP;
333         if (sk->sk_shutdown & RCV_SHUTDOWN)
334                 mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
335
336         /* Connected? */
337         if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
338                 if (atomic_read(&sk->sk_rmem_alloc) > 0)
339                         mask |= EPOLLIN | EPOLLRDNORM;
340
341                 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
342                         if (sk_stream_is_writeable(sk)) {
343                                 mask |= EPOLLOUT | EPOLLWRNORM;
344                         } else {  /* send SIGIO later */
345                                 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
346                                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
347
348                                 /* Race breaker. If space is freed after
349                                  * wspace test but before the flags are set,
350                                  * IO signal will be lost.
351                                  */
352                                 if (sk_stream_is_writeable(sk))
353                                         mask |= EPOLLOUT | EPOLLWRNORM;
354                         }
355                 }
356         }
357         return mask;
358 }
359
360 EXPORT_SYMBOL_GPL(dccp_poll);
361
362 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
363 {
364         int rc = -ENOTCONN;
365
366         lock_sock(sk);
367
368         if (sk->sk_state == DCCP_LISTEN)
369                 goto out;
370
371         switch (cmd) {
372         case SIOCOUTQ: {
373                 int amount = sk_wmem_alloc_get(sk);
374                 /* Using sk_wmem_alloc here because sk_wmem_queued is not used by DCCP and
375                  * always 0, comparably to UDP.
376                  */
377
378                 rc = put_user(amount, (int __user *)arg);
379         }
380                 break;
381         case SIOCINQ: {
382                 struct sk_buff *skb;
383                 unsigned long amount = 0;
384
385                 skb = skb_peek(&sk->sk_receive_queue);
386                 if (skb != NULL) {
387                         /*
388                          * We will only return the amount of this packet since
389                          * that is all that will be read.
390                          */
391                         amount = skb->len;
392                 }
393                 rc = put_user(amount, (int __user *)arg);
394         }
395                 break;
396         default:
397                 rc = -ENOIOCTLCMD;
398                 break;
399         }
400 out:
401         release_sock(sk);
402         return rc;
403 }
404
405 EXPORT_SYMBOL_GPL(dccp_ioctl);
406
407 static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
408                                    sockptr_t optval, unsigned int optlen)
409 {
410         struct dccp_sock *dp = dccp_sk(sk);
411         struct dccp_service_list *sl = NULL;
412
413         if (service == DCCP_SERVICE_INVALID_VALUE ||
414             optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
415                 return -EINVAL;
416
417         if (optlen > sizeof(service)) {
418                 sl = kmalloc(optlen, GFP_KERNEL);
419                 if (sl == NULL)
420                         return -ENOMEM;
421
422                 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
423                 if (copy_from_sockptr_offset(sl->dccpsl_list, optval,
424                                 sizeof(service), optlen - sizeof(service)) ||
425                     dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
426                         kfree(sl);
427                         return -EFAULT;
428                 }
429         }
430
431         lock_sock(sk);
432         dp->dccps_service = service;
433
434         kfree(dp->dccps_service_list);
435
436         dp->dccps_service_list = sl;
437         release_sock(sk);
438         return 0;
439 }
440
441 static int dccp_setsockopt_cscov(struct sock *sk, int cscov, bool rx)
442 {
443         u8 *list, len;
444         int i, rc;
445
446         if (cscov < 0 || cscov > 15)
447                 return -EINVAL;
448         /*
449          * Populate a list of permissible values, in the range cscov...15. This
450          * is necessary since feature negotiation of single values only works if
451          * both sides incidentally choose the same value. Since the list starts
452          * lowest-value first, negotiation will pick the smallest shared value.
453          */
454         if (cscov == 0)
455                 return 0;
456         len = 16 - cscov;
457
458         list = kmalloc(len, GFP_KERNEL);
459         if (list == NULL)
460                 return -ENOBUFS;
461
462         for (i = 0; i < len; i++)
463                 list[i] = cscov++;
464
465         rc = dccp_feat_register_sp(sk, DCCPF_MIN_CSUM_COVER, rx, list, len);
466
467         if (rc == 0) {
468                 if (rx)
469                         dccp_sk(sk)->dccps_pcrlen = cscov;
470                 else
471                         dccp_sk(sk)->dccps_pcslen = cscov;
472         }
473         kfree(list);
474         return rc;
475 }
476
477 static int dccp_setsockopt_ccid(struct sock *sk, int type,
478                                 sockptr_t optval, unsigned int optlen)
479 {
480         u8 *val;
481         int rc = 0;
482
483         if (optlen < 1 || optlen > DCCP_FEAT_MAX_SP_VALS)
484                 return -EINVAL;
485
486         val = memdup_sockptr(optval, optlen);
487         if (IS_ERR(val))
488                 return PTR_ERR(val);
489
490         lock_sock(sk);
491         if (type == DCCP_SOCKOPT_TX_CCID || type == DCCP_SOCKOPT_CCID)
492                 rc = dccp_feat_register_sp(sk, DCCPF_CCID, 1, val, optlen);
493
494         if (!rc && (type == DCCP_SOCKOPT_RX_CCID || type == DCCP_SOCKOPT_CCID))
495                 rc = dccp_feat_register_sp(sk, DCCPF_CCID, 0, val, optlen);
496         release_sock(sk);
497
498         kfree(val);
499         return rc;
500 }
501
502 static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
503                 sockptr_t optval, unsigned int optlen)
504 {
505         struct dccp_sock *dp = dccp_sk(sk);
506         int val, err = 0;
507
508         switch (optname) {
509         case DCCP_SOCKOPT_PACKET_SIZE:
510                 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
511                 return 0;
512         case DCCP_SOCKOPT_CHANGE_L:
513         case DCCP_SOCKOPT_CHANGE_R:
514                 DCCP_WARN("sockopt(CHANGE_L/R) is deprecated: fix your app\n");
515                 return 0;
516         case DCCP_SOCKOPT_CCID:
517         case DCCP_SOCKOPT_RX_CCID:
518         case DCCP_SOCKOPT_TX_CCID:
519                 return dccp_setsockopt_ccid(sk, optname, optval, optlen);
520         }
521
522         if (optlen < (int)sizeof(int))
523                 return -EINVAL;
524
525         if (copy_from_sockptr(&val, optval, sizeof(int)))
526                 return -EFAULT;
527
528         if (optname == DCCP_SOCKOPT_SERVICE)
529                 return dccp_setsockopt_service(sk, val, optval, optlen);
530
531         lock_sock(sk);
532         switch (optname) {
533         case DCCP_SOCKOPT_SERVER_TIMEWAIT:
534                 if (dp->dccps_role != DCCP_ROLE_SERVER)
535                         err = -EOPNOTSUPP;
536                 else
537                         dp->dccps_server_timewait = (val != 0);
538                 break;
539         case DCCP_SOCKOPT_SEND_CSCOV:
540                 err = dccp_setsockopt_cscov(sk, val, false);
541                 break;
542         case DCCP_SOCKOPT_RECV_CSCOV:
543                 err = dccp_setsockopt_cscov(sk, val, true);
544                 break;
545         case DCCP_SOCKOPT_QPOLICY_ID:
546                 if (sk->sk_state != DCCP_CLOSED)
547                         err = -EISCONN;
548                 else if (val < 0 || val >= DCCPQ_POLICY_MAX)
549                         err = -EINVAL;
550                 else
551                         dp->dccps_qpolicy = val;
552                 break;
553         case DCCP_SOCKOPT_QPOLICY_TXQLEN:
554                 if (val < 0)
555                         err = -EINVAL;
556                 else
557                         dp->dccps_tx_qlen = val;
558                 break;
559         default:
560                 err = -ENOPROTOOPT;
561                 break;
562         }
563         release_sock(sk);
564
565         return err;
566 }
567
568 int dccp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
569                     unsigned int optlen)
570 {
571         if (level != SOL_DCCP)
572                 return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
573                                                              optname, optval,
574                                                              optlen);
575         return do_dccp_setsockopt(sk, level, optname, optval, optlen);
576 }
577
578 EXPORT_SYMBOL_GPL(dccp_setsockopt);
579
580 static int dccp_getsockopt_service(struct sock *sk, int len,
581                                    __be32 __user *optval,
582                                    int __user *optlen)
583 {
584         const struct dccp_sock *dp = dccp_sk(sk);
585         const struct dccp_service_list *sl;
586         int err = -ENOENT, slen = 0, total_len = sizeof(u32);
587
588         lock_sock(sk);
589         if ((sl = dp->dccps_service_list) != NULL) {
590                 slen = sl->dccpsl_nr * sizeof(u32);
591                 total_len += slen;
592         }
593
594         err = -EINVAL;
595         if (total_len > len)
596                 goto out;
597
598         err = 0;
599         if (put_user(total_len, optlen) ||
600             put_user(dp->dccps_service, optval) ||
601             (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
602                 err = -EFAULT;
603 out:
604         release_sock(sk);
605         return err;
606 }
607
608 static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
609                     char __user *optval, int __user *optlen)
610 {
611         struct dccp_sock *dp;
612         int val, len;
613
614         if (get_user(len, optlen))
615                 return -EFAULT;
616
617         if (len < (int)sizeof(int))
618                 return -EINVAL;
619
620         dp = dccp_sk(sk);
621
622         switch (optname) {
623         case DCCP_SOCKOPT_PACKET_SIZE:
624                 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
625                 return 0;
626         case DCCP_SOCKOPT_SERVICE:
627                 return dccp_getsockopt_service(sk, len,
628                                                (__be32 __user *)optval, optlen);
629         case DCCP_SOCKOPT_GET_CUR_MPS:
630                 val = dp->dccps_mss_cache;
631                 break;
632         case DCCP_SOCKOPT_AVAILABLE_CCIDS:
633                 return ccid_getsockopt_builtin_ccids(sk, len, optval, optlen);
634         case DCCP_SOCKOPT_TX_CCID:
635                 val = ccid_get_current_tx_ccid(dp);
636                 if (val < 0)
637                         return -ENOPROTOOPT;
638                 break;
639         case DCCP_SOCKOPT_RX_CCID:
640                 val = ccid_get_current_rx_ccid(dp);
641                 if (val < 0)
642                         return -ENOPROTOOPT;
643                 break;
644         case DCCP_SOCKOPT_SERVER_TIMEWAIT:
645                 val = dp->dccps_server_timewait;
646                 break;
647         case DCCP_SOCKOPT_SEND_CSCOV:
648                 val = dp->dccps_pcslen;
649                 break;
650         case DCCP_SOCKOPT_RECV_CSCOV:
651                 val = dp->dccps_pcrlen;
652                 break;
653         case DCCP_SOCKOPT_QPOLICY_ID:
654                 val = dp->dccps_qpolicy;
655                 break;
656         case DCCP_SOCKOPT_QPOLICY_TXQLEN:
657                 val = dp->dccps_tx_qlen;
658                 break;
659         case 128 ... 191:
660                 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
661                                              len, (u32 __user *)optval, optlen);
662         case 192 ... 255:
663                 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
664                                              len, (u32 __user *)optval, optlen);
665         default:
666                 return -ENOPROTOOPT;
667         }
668
669         len = sizeof(val);
670         if (put_user(len, optlen) || copy_to_user(optval, &val, len))
671                 return -EFAULT;
672
673         return 0;
674 }
675
676 int dccp_getsockopt(struct sock *sk, int level, int optname,
677                     char __user *optval, int __user *optlen)
678 {
679         if (level != SOL_DCCP)
680                 return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
681                                                              optname, optval,
682                                                              optlen);
683         return do_dccp_getsockopt(sk, level, optname, optval, optlen);
684 }
685
686 EXPORT_SYMBOL_GPL(dccp_getsockopt);
687
688 static int dccp_msghdr_parse(struct msghdr *msg, struct sk_buff *skb)
689 {
690         struct cmsghdr *cmsg;
691
692         /*
693          * Assign an (opaque) qpolicy priority value to skb->priority.
694          *
695          * We are overloading this skb field for use with the qpolicy subystem.
696          * The skb->priority is normally used for the SO_PRIORITY option, which
697          * is initialised from sk_priority. Since the assignment of sk_priority
698          * to skb->priority happens later (on layer 3), we overload this field
699          * for use with queueing priorities as long as the skb is on layer 4.
700          * The default priority value (if nothing is set) is 0.
701          */
702         skb->priority = 0;
703
704         for_each_cmsghdr(cmsg, msg) {
705                 if (!CMSG_OK(msg, cmsg))
706                         return -EINVAL;
707
708                 if (cmsg->cmsg_level != SOL_DCCP)
709                         continue;
710
711                 if (cmsg->cmsg_type <= DCCP_SCM_QPOLICY_MAX &&
712                     !dccp_qpolicy_param_ok(skb->sk, cmsg->cmsg_type))
713                         return -EINVAL;
714
715                 switch (cmsg->cmsg_type) {
716                 case DCCP_SCM_PRIORITY:
717                         if (cmsg->cmsg_len != CMSG_LEN(sizeof(__u32)))
718                                 return -EINVAL;
719                         skb->priority = *(__u32 *)CMSG_DATA(cmsg);
720                         break;
721                 default:
722                         return -EINVAL;
723                 }
724         }
725         return 0;
726 }
727
728 int dccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
729 {
730         const struct dccp_sock *dp = dccp_sk(sk);
731         const int flags = msg->msg_flags;
732         const int noblock = flags & MSG_DONTWAIT;
733         struct sk_buff *skb;
734         int rc, size;
735         long timeo;
736
737         trace_dccp_probe(sk, len);
738
739         if (len > dp->dccps_mss_cache)
740                 return -EMSGSIZE;
741
742         lock_sock(sk);
743
744         timeo = sock_sndtimeo(sk, noblock);
745
746         /*
747          * We have to use sk_stream_wait_connect here to set sk_write_pending,
748          * so that the trick in dccp_rcv_request_sent_state_process.
749          */
750         /* Wait for a connection to finish. */
751         if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN))
752                 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
753                         goto out_release;
754
755         size = sk->sk_prot->max_header + len;
756         release_sock(sk);
757         skb = sock_alloc_send_skb(sk, size, noblock, &rc);
758         lock_sock(sk);
759         if (skb == NULL)
760                 goto out_release;
761
762         if (dccp_qpolicy_full(sk)) {
763                 rc = -EAGAIN;
764                 goto out_discard;
765         }
766
767         if (sk->sk_state == DCCP_CLOSED) {
768                 rc = -ENOTCONN;
769                 goto out_discard;
770         }
771
772         skb_reserve(skb, sk->sk_prot->max_header);
773         rc = memcpy_from_msg(skb_put(skb, len), msg, len);
774         if (rc != 0)
775                 goto out_discard;
776
777         rc = dccp_msghdr_parse(msg, skb);
778         if (rc != 0)
779                 goto out_discard;
780
781         dccp_qpolicy_push(sk, skb);
782         /*
783          * The xmit_timer is set if the TX CCID is rate-based and will expire
784          * when congestion control permits to release further packets into the
785          * network. Window-based CCIDs do not use this timer.
786          */
787         if (!timer_pending(&dp->dccps_xmit_timer))
788                 dccp_write_xmit(sk);
789 out_release:
790         release_sock(sk);
791         return rc ? : len;
792 out_discard:
793         kfree_skb(skb);
794         goto out_release;
795 }
796
797 EXPORT_SYMBOL_GPL(dccp_sendmsg);
798
799 int dccp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags,
800                  int *addr_len)
801 {
802         const struct dccp_hdr *dh;
803         long timeo;
804
805         lock_sock(sk);
806
807         if (sk->sk_state == DCCP_LISTEN) {
808                 len = -ENOTCONN;
809                 goto out;
810         }
811
812         timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
813
814         do {
815                 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
816
817                 if (skb == NULL)
818                         goto verify_sock_status;
819
820                 dh = dccp_hdr(skb);
821
822                 switch (dh->dccph_type) {
823                 case DCCP_PKT_DATA:
824                 case DCCP_PKT_DATAACK:
825                         goto found_ok_skb;
826
827                 case DCCP_PKT_CLOSE:
828                 case DCCP_PKT_CLOSEREQ:
829                         if (!(flags & MSG_PEEK))
830                                 dccp_finish_passive_close(sk);
831                         fallthrough;
832                 case DCCP_PKT_RESET:
833                         dccp_pr_debug("found fin (%s) ok!\n",
834                                       dccp_packet_name(dh->dccph_type));
835                         len = 0;
836                         goto found_fin_ok;
837                 default:
838                         dccp_pr_debug("packet_type=%s\n",
839                                       dccp_packet_name(dh->dccph_type));
840                         sk_eat_skb(sk, skb);
841                 }
842 verify_sock_status:
843                 if (sock_flag(sk, SOCK_DONE)) {
844                         len = 0;
845                         break;
846                 }
847
848                 if (sk->sk_err) {
849                         len = sock_error(sk);
850                         break;
851                 }
852
853                 if (sk->sk_shutdown & RCV_SHUTDOWN) {
854                         len = 0;
855                         break;
856                 }
857
858                 if (sk->sk_state == DCCP_CLOSED) {
859                         if (!sock_flag(sk, SOCK_DONE)) {
860                                 /* This occurs when user tries to read
861                                  * from never connected socket.
862                                  */
863                                 len = -ENOTCONN;
864                                 break;
865                         }
866                         len = 0;
867                         break;
868                 }
869
870                 if (!timeo) {
871                         len = -EAGAIN;
872                         break;
873                 }
874
875                 if (signal_pending(current)) {
876                         len = sock_intr_errno(timeo);
877                         break;
878                 }
879
880                 sk_wait_data(sk, &timeo, NULL);
881                 continue;
882         found_ok_skb:
883                 if (len > skb->len)
884                         len = skb->len;
885                 else if (len < skb->len)
886                         msg->msg_flags |= MSG_TRUNC;
887
888                 if (skb_copy_datagram_msg(skb, 0, msg, len)) {
889                         /* Exception. Bailout! */
890                         len = -EFAULT;
891                         break;
892                 }
893                 if (flags & MSG_TRUNC)
894                         len = skb->len;
895         found_fin_ok:
896                 if (!(flags & MSG_PEEK))
897                         sk_eat_skb(sk, skb);
898                 break;
899         } while (1);
900 out:
901         release_sock(sk);
902         return len;
903 }
904
905 EXPORT_SYMBOL_GPL(dccp_recvmsg);
906
907 int inet_dccp_listen(struct socket *sock, int backlog)
908 {
909         struct sock *sk = sock->sk;
910         unsigned char old_state;
911         int err;
912
913         lock_sock(sk);
914
915         err = -EINVAL;
916         if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
917                 goto out;
918
919         old_state = sk->sk_state;
920         if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
921                 goto out;
922
923         WRITE_ONCE(sk->sk_max_ack_backlog, backlog);
924         /* Really, if the socket is already in listen state
925          * we can only allow the backlog to be adjusted.
926          */
927         if (old_state != DCCP_LISTEN) {
928                 struct dccp_sock *dp = dccp_sk(sk);
929
930                 dp->dccps_role = DCCP_ROLE_LISTEN;
931
932                 /* do not start to listen if feature negotiation setup fails */
933                 if (dccp_feat_finalise_settings(dp)) {
934                         err = -EPROTO;
935                         goto out;
936                 }
937
938                 err = inet_csk_listen_start(sk);
939                 if (err)
940                         goto out;
941         }
942         err = 0;
943
944 out:
945         release_sock(sk);
946         return err;
947 }
948
949 EXPORT_SYMBOL_GPL(inet_dccp_listen);
950
951 static void dccp_terminate_connection(struct sock *sk)
952 {
953         u8 next_state = DCCP_CLOSED;
954
955         switch (sk->sk_state) {
956         case DCCP_PASSIVE_CLOSE:
957         case DCCP_PASSIVE_CLOSEREQ:
958                 dccp_finish_passive_close(sk);
959                 break;
960         case DCCP_PARTOPEN:
961                 dccp_pr_debug("Stop PARTOPEN timer (%p)\n", sk);
962                 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
963                 fallthrough;
964         case DCCP_OPEN:
965                 dccp_send_close(sk, 1);
966
967                 if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER &&
968                     !dccp_sk(sk)->dccps_server_timewait)
969                         next_state = DCCP_ACTIVE_CLOSEREQ;
970                 else
971                         next_state = DCCP_CLOSING;
972                 fallthrough;
973         default:
974                 dccp_set_state(sk, next_state);
975         }
976 }
977
978 void dccp_close(struct sock *sk, long timeout)
979 {
980         struct dccp_sock *dp = dccp_sk(sk);
981         struct sk_buff *skb;
982         u32 data_was_unread = 0;
983         int state;
984
985         lock_sock(sk);
986
987         sk->sk_shutdown = SHUTDOWN_MASK;
988
989         if (sk->sk_state == DCCP_LISTEN) {
990                 dccp_set_state(sk, DCCP_CLOSED);
991
992                 /* Special case. */
993                 inet_csk_listen_stop(sk);
994
995                 goto adjudge_to_death;
996         }
997
998         sk_stop_timer(sk, &dp->dccps_xmit_timer);
999
1000         /*
1001          * We need to flush the recv. buffs.  We do this only on the
1002          * descriptor close, not protocol-sourced closes, because the
1003           *reader process may not have drained the data yet!
1004          */
1005         while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
1006                 data_was_unread += skb->len;
1007                 __kfree_skb(skb);
1008         }
1009
1010         /* If socket has been already reset kill it. */
1011         if (sk->sk_state == DCCP_CLOSED)
1012                 goto adjudge_to_death;
1013
1014         if (data_was_unread) {
1015                 /* Unread data was tossed, send an appropriate Reset Code */
1016                 DCCP_WARN("ABORT with %u bytes unread\n", data_was_unread);
1017                 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
1018                 dccp_set_state(sk, DCCP_CLOSED);
1019         } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
1020                 /* Check zero linger _after_ checking for unread data. */
1021                 sk->sk_prot->disconnect(sk, 0);
1022         } else if (sk->sk_state != DCCP_CLOSED) {
1023                 /*
1024                  * Normal connection termination. May need to wait if there are
1025                  * still packets in the TX queue that are delayed by the CCID.
1026                  */
1027                 dccp_flush_write_queue(sk, &timeout);
1028                 dccp_terminate_connection(sk);
1029         }
1030
1031         /*
1032          * Flush write queue. This may be necessary in several cases:
1033          * - we have been closed by the peer but still have application data;
1034          * - abortive termination (unread data or zero linger time),
1035          * - normal termination but queue could not be flushed within time limit
1036          */
1037         __skb_queue_purge(&sk->sk_write_queue);
1038
1039         sk_stream_wait_close(sk, timeout);
1040
1041 adjudge_to_death:
1042         state = sk->sk_state;
1043         sock_hold(sk);
1044         sock_orphan(sk);
1045
1046         /*
1047          * It is the last release_sock in its life. It will remove backlog.
1048          */
1049         release_sock(sk);
1050         /*
1051          * Now socket is owned by kernel and we acquire BH lock
1052          * to finish close. No need to check for user refs.
1053          */
1054         local_bh_disable();
1055         bh_lock_sock(sk);
1056         WARN_ON(sock_owned_by_user(sk));
1057
1058         this_cpu_inc(dccp_orphan_count);
1059
1060         /* Have we already been destroyed by a softirq or backlog? */
1061         if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
1062                 goto out;
1063
1064         if (sk->sk_state == DCCP_CLOSED)
1065                 inet_csk_destroy_sock(sk);
1066
1067         /* Otherwise, socket is reprieved until protocol close. */
1068
1069 out:
1070         bh_unlock_sock(sk);
1071         local_bh_enable();
1072         sock_put(sk);
1073 }
1074
1075 EXPORT_SYMBOL_GPL(dccp_close);
1076
1077 void dccp_shutdown(struct sock *sk, int how)
1078 {
1079         dccp_pr_debug("called shutdown(%x)\n", how);
1080 }
1081
1082 EXPORT_SYMBOL_GPL(dccp_shutdown);
1083
1084 static inline int __init dccp_mib_init(void)
1085 {
1086         dccp_statistics = alloc_percpu(struct dccp_mib);
1087         if (!dccp_statistics)
1088                 return -ENOMEM;
1089         return 0;
1090 }
1091
1092 static inline void dccp_mib_exit(void)
1093 {
1094         free_percpu(dccp_statistics);
1095 }
1096
1097 static int thash_entries;
1098 module_param(thash_entries, int, 0444);
1099 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
1100
1101 #ifdef CONFIG_IP_DCCP_DEBUG
1102 bool dccp_debug;
1103 module_param(dccp_debug, bool, 0644);
1104 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
1105
1106 EXPORT_SYMBOL_GPL(dccp_debug);
1107 #endif
1108
1109 static int __init dccp_init(void)
1110 {
1111         unsigned long goal;
1112         unsigned long nr_pages = totalram_pages();
1113         int ehash_order, bhash_order, i;
1114         int rc;
1115
1116         BUILD_BUG_ON(sizeof(struct dccp_skb_cb) >
1117                      sizeof_field(struct sk_buff, cb));
1118         rc = inet_hashinfo2_init_mod(&dccp_hashinfo);
1119         if (rc)
1120                 goto out_fail;
1121         rc = -ENOBUFS;
1122         dccp_hashinfo.bind_bucket_cachep =
1123                 kmem_cache_create("dccp_bind_bucket",
1124                                   sizeof(struct inet_bind_bucket), 0,
1125                                   SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT, NULL);
1126         if (!dccp_hashinfo.bind_bucket_cachep)
1127                 goto out_free_hashinfo2;
1128         dccp_hashinfo.bind2_bucket_cachep =
1129                 kmem_cache_create("dccp_bind2_bucket",
1130                                   sizeof(struct inet_bind2_bucket), 0,
1131                                   SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT, NULL);
1132         if (!dccp_hashinfo.bind2_bucket_cachep)
1133                 goto out_free_bind_bucket_cachep;
1134
1135         /*
1136          * Size and allocate the main established and bind bucket
1137          * hash tables.
1138          *
1139          * The methodology is similar to that of the buffer cache.
1140          */
1141         if (nr_pages >= (128 * 1024))
1142                 goal = nr_pages >> (21 - PAGE_SHIFT);
1143         else
1144                 goal = nr_pages >> (23 - PAGE_SHIFT);
1145
1146         if (thash_entries)
1147                 goal = (thash_entries *
1148                         sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
1149         for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
1150                 ;
1151         do {
1152                 unsigned long hash_size = (1UL << ehash_order) * PAGE_SIZE /
1153                                         sizeof(struct inet_ehash_bucket);
1154
1155                 while (hash_size & (hash_size - 1))
1156                         hash_size--;
1157                 dccp_hashinfo.ehash_mask = hash_size - 1;
1158                 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1159                         __get_free_pages(GFP_ATOMIC|__GFP_NOWARN, ehash_order);
1160         } while (!dccp_hashinfo.ehash && --ehash_order > 0);
1161
1162         if (!dccp_hashinfo.ehash) {
1163                 DCCP_CRIT("Failed to allocate DCCP established hash table");
1164                 goto out_free_bind2_bucket_cachep;
1165         }
1166
1167         for (i = 0; i <= dccp_hashinfo.ehash_mask; i++)
1168                 INIT_HLIST_NULLS_HEAD(&dccp_hashinfo.ehash[i].chain, i);
1169
1170         if (inet_ehash_locks_alloc(&dccp_hashinfo))
1171                         goto out_free_dccp_ehash;
1172
1173         bhash_order = ehash_order;
1174
1175         do {
1176                 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1177                                         sizeof(struct inet_bind_hashbucket);
1178                 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1179                     bhash_order > 0)
1180                         continue;
1181                 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1182                         __get_free_pages(GFP_ATOMIC|__GFP_NOWARN, bhash_order);
1183         } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1184
1185         if (!dccp_hashinfo.bhash) {
1186                 DCCP_CRIT("Failed to allocate DCCP bind hash table");
1187                 goto out_free_dccp_locks;
1188         }
1189
1190         dccp_hashinfo.bhash2 = (struct inet_bind_hashbucket *)
1191                 __get_free_pages(GFP_ATOMIC | __GFP_NOWARN, bhash_order);
1192
1193         if (!dccp_hashinfo.bhash2) {
1194                 DCCP_CRIT("Failed to allocate DCCP bind2 hash table");
1195                 goto out_free_dccp_bhash;
1196         }
1197
1198         for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1199                 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1200                 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1201                 spin_lock_init(&dccp_hashinfo.bhash2[i].lock);
1202                 INIT_HLIST_HEAD(&dccp_hashinfo.bhash2[i].chain);
1203         }
1204
1205         dccp_hashinfo.pernet = false;
1206
1207         rc = dccp_mib_init();
1208         if (rc)
1209                 goto out_free_dccp_bhash2;
1210
1211         rc = dccp_ackvec_init();
1212         if (rc)
1213                 goto out_free_dccp_mib;
1214
1215         rc = dccp_sysctl_init();
1216         if (rc)
1217                 goto out_ackvec_exit;
1218
1219         rc = ccid_initialize_builtins();
1220         if (rc)
1221                 goto out_sysctl_exit;
1222
1223         dccp_timestamping_init();
1224
1225         return 0;
1226
1227 out_sysctl_exit:
1228         dccp_sysctl_exit();
1229 out_ackvec_exit:
1230         dccp_ackvec_exit();
1231 out_free_dccp_mib:
1232         dccp_mib_exit();
1233 out_free_dccp_bhash2:
1234         free_pages((unsigned long)dccp_hashinfo.bhash2, bhash_order);
1235 out_free_dccp_bhash:
1236         free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1237 out_free_dccp_locks:
1238         inet_ehash_locks_free(&dccp_hashinfo);
1239 out_free_dccp_ehash:
1240         free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1241 out_free_bind2_bucket_cachep:
1242         kmem_cache_destroy(dccp_hashinfo.bind2_bucket_cachep);
1243 out_free_bind_bucket_cachep:
1244         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1245 out_free_hashinfo2:
1246         inet_hashinfo2_free_mod(&dccp_hashinfo);
1247 out_fail:
1248         dccp_hashinfo.bhash = NULL;
1249         dccp_hashinfo.bhash2 = NULL;
1250         dccp_hashinfo.ehash = NULL;
1251         dccp_hashinfo.bind_bucket_cachep = NULL;
1252         dccp_hashinfo.bind2_bucket_cachep = NULL;
1253         return rc;
1254 }
1255
1256 static void __exit dccp_fini(void)
1257 {
1258         int bhash_order = get_order(dccp_hashinfo.bhash_size *
1259                                     sizeof(struct inet_bind_hashbucket));
1260
1261         ccid_cleanup_builtins();
1262         dccp_mib_exit();
1263         free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1264         free_pages((unsigned long)dccp_hashinfo.bhash2, bhash_order);
1265         free_pages((unsigned long)dccp_hashinfo.ehash,
1266                    get_order((dccp_hashinfo.ehash_mask + 1) *
1267                              sizeof(struct inet_ehash_bucket)));
1268         inet_ehash_locks_free(&dccp_hashinfo);
1269         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1270         dccp_ackvec_exit();
1271         dccp_sysctl_exit();
1272         inet_hashinfo2_free_mod(&dccp_hashinfo);
1273 }
1274
1275 module_init(dccp_init);
1276 module_exit(dccp_fini);
1277
1278 MODULE_LICENSE("GPL");
1279 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1280 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");