Merge tag 'trace-v6.1-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/trace...
[platform/kernel/linux-starfive.git] / net / dccp / proto.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  *  net/dccp/proto.c
4  *
5  *  An implementation of the DCCP protocol
6  *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
7  */
8
9 #include <linux/dccp.h>
10 #include <linux/module.h>
11 #include <linux/types.h>
12 #include <linux/sched.h>
13 #include <linux/kernel.h>
14 #include <linux/skbuff.h>
15 #include <linux/netdevice.h>
16 #include <linux/in.h>
17 #include <linux/if_arp.h>
18 #include <linux/init.h>
19 #include <linux/random.h>
20 #include <linux/slab.h>
21 #include <net/checksum.h>
22
23 #include <net/inet_sock.h>
24 #include <net/inet_common.h>
25 #include <net/sock.h>
26 #include <net/xfrm.h>
27
28 #include <asm/ioctls.h>
29 #include <linux/spinlock.h>
30 #include <linux/timer.h>
31 #include <linux/delay.h>
32 #include <linux/poll.h>
33
34 #include "ccid.h"
35 #include "dccp.h"
36 #include "feat.h"
37
38 #define CREATE_TRACE_POINTS
39 #include "trace.h"
40
41 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
42
43 EXPORT_SYMBOL_GPL(dccp_statistics);
44
45 DEFINE_PER_CPU(unsigned int, dccp_orphan_count);
46 EXPORT_PER_CPU_SYMBOL_GPL(dccp_orphan_count);
47
48 struct inet_hashinfo dccp_hashinfo;
49 EXPORT_SYMBOL_GPL(dccp_hashinfo);
50
51 /* the maximum queue length for tx in packets. 0 is no limit */
52 int sysctl_dccp_tx_qlen __read_mostly = 5;
53
54 #ifdef CONFIG_IP_DCCP_DEBUG
55 static const char *dccp_state_name(const int state)
56 {
57         static const char *const dccp_state_names[] = {
58         [DCCP_OPEN]             = "OPEN",
59         [DCCP_REQUESTING]       = "REQUESTING",
60         [DCCP_PARTOPEN]         = "PARTOPEN",
61         [DCCP_LISTEN]           = "LISTEN",
62         [DCCP_RESPOND]          = "RESPOND",
63         [DCCP_CLOSING]          = "CLOSING",
64         [DCCP_ACTIVE_CLOSEREQ]  = "CLOSEREQ",
65         [DCCP_PASSIVE_CLOSE]    = "PASSIVE_CLOSE",
66         [DCCP_PASSIVE_CLOSEREQ] = "PASSIVE_CLOSEREQ",
67         [DCCP_TIME_WAIT]        = "TIME_WAIT",
68         [DCCP_CLOSED]           = "CLOSED",
69         };
70
71         if (state >= DCCP_MAX_STATES)
72                 return "INVALID STATE!";
73         else
74                 return dccp_state_names[state];
75 }
76 #endif
77
78 void dccp_set_state(struct sock *sk, const int state)
79 {
80         const int oldstate = sk->sk_state;
81
82         dccp_pr_debug("%s(%p)  %s  -->  %s\n", dccp_role(sk), sk,
83                       dccp_state_name(oldstate), dccp_state_name(state));
84         WARN_ON(state == oldstate);
85
86         switch (state) {
87         case DCCP_OPEN:
88                 if (oldstate != DCCP_OPEN)
89                         DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
90                 /* Client retransmits all Confirm options until entering OPEN */
91                 if (oldstate == DCCP_PARTOPEN)
92                         dccp_feat_list_purge(&dccp_sk(sk)->dccps_featneg);
93                 break;
94
95         case DCCP_CLOSED:
96                 if (oldstate == DCCP_OPEN || oldstate == DCCP_ACTIVE_CLOSEREQ ||
97                     oldstate == DCCP_CLOSING)
98                         DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
99
100                 sk->sk_prot->unhash(sk);
101                 if (inet_csk(sk)->icsk_bind_hash != NULL &&
102                     !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
103                         inet_put_port(sk);
104                 fallthrough;
105         default:
106                 if (oldstate == DCCP_OPEN)
107                         DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
108         }
109
110         /* Change state AFTER socket is unhashed to avoid closed
111          * socket sitting in hash tables.
112          */
113         inet_sk_set_state(sk, state);
114 }
115
116 EXPORT_SYMBOL_GPL(dccp_set_state);
117
118 static void dccp_finish_passive_close(struct sock *sk)
119 {
120         switch (sk->sk_state) {
121         case DCCP_PASSIVE_CLOSE:
122                 /* Node (client or server) has received Close packet. */
123                 dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED);
124                 dccp_set_state(sk, DCCP_CLOSED);
125                 break;
126         case DCCP_PASSIVE_CLOSEREQ:
127                 /*
128                  * Client received CloseReq. We set the `active' flag so that
129                  * dccp_send_close() retransmits the Close as per RFC 4340, 8.3.
130                  */
131                 dccp_send_close(sk, 1);
132                 dccp_set_state(sk, DCCP_CLOSING);
133         }
134 }
135
136 void dccp_done(struct sock *sk)
137 {
138         dccp_set_state(sk, DCCP_CLOSED);
139         dccp_clear_xmit_timers(sk);
140
141         sk->sk_shutdown = SHUTDOWN_MASK;
142
143         if (!sock_flag(sk, SOCK_DEAD))
144                 sk->sk_state_change(sk);
145         else
146                 inet_csk_destroy_sock(sk);
147 }
148
149 EXPORT_SYMBOL_GPL(dccp_done);
150
151 const char *dccp_packet_name(const int type)
152 {
153         static const char *const dccp_packet_names[] = {
154                 [DCCP_PKT_REQUEST]  = "REQUEST",
155                 [DCCP_PKT_RESPONSE] = "RESPONSE",
156                 [DCCP_PKT_DATA]     = "DATA",
157                 [DCCP_PKT_ACK]      = "ACK",
158                 [DCCP_PKT_DATAACK]  = "DATAACK",
159                 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
160                 [DCCP_PKT_CLOSE]    = "CLOSE",
161                 [DCCP_PKT_RESET]    = "RESET",
162                 [DCCP_PKT_SYNC]     = "SYNC",
163                 [DCCP_PKT_SYNCACK]  = "SYNCACK",
164         };
165
166         if (type >= DCCP_NR_PKT_TYPES)
167                 return "INVALID";
168         else
169                 return dccp_packet_names[type];
170 }
171
172 EXPORT_SYMBOL_GPL(dccp_packet_name);
173
174 static void dccp_sk_destruct(struct sock *sk)
175 {
176         struct dccp_sock *dp = dccp_sk(sk);
177
178         ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
179         dp->dccps_hc_tx_ccid = NULL;
180         inet_sock_destruct(sk);
181 }
182
183 int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
184 {
185         struct dccp_sock *dp = dccp_sk(sk);
186         struct inet_connection_sock *icsk = inet_csk(sk);
187
188         icsk->icsk_rto          = DCCP_TIMEOUT_INIT;
189         icsk->icsk_syn_retries  = sysctl_dccp_request_retries;
190         sk->sk_state            = DCCP_CLOSED;
191         sk->sk_write_space      = dccp_write_space;
192         sk->sk_destruct         = dccp_sk_destruct;
193         icsk->icsk_sync_mss     = dccp_sync_mss;
194         dp->dccps_mss_cache     = 536;
195         dp->dccps_rate_last     = jiffies;
196         dp->dccps_role          = DCCP_ROLE_UNDEFINED;
197         dp->dccps_service       = DCCP_SERVICE_CODE_IS_ABSENT;
198         dp->dccps_tx_qlen       = sysctl_dccp_tx_qlen;
199
200         dccp_init_xmit_timers(sk);
201
202         INIT_LIST_HEAD(&dp->dccps_featneg);
203         /* control socket doesn't need feat nego */
204         if (likely(ctl_sock_initialized))
205                 return dccp_feat_init(sk);
206         return 0;
207 }
208
209 EXPORT_SYMBOL_GPL(dccp_init_sock);
210
211 void dccp_destroy_sock(struct sock *sk)
212 {
213         struct dccp_sock *dp = dccp_sk(sk);
214
215         __skb_queue_purge(&sk->sk_write_queue);
216         if (sk->sk_send_head != NULL) {
217                 kfree_skb(sk->sk_send_head);
218                 sk->sk_send_head = NULL;
219         }
220
221         /* Clean up a referenced DCCP bind bucket. */
222         if (inet_csk(sk)->icsk_bind_hash != NULL)
223                 inet_put_port(sk);
224
225         kfree(dp->dccps_service_list);
226         dp->dccps_service_list = NULL;
227
228         if (dp->dccps_hc_rx_ackvec != NULL) {
229                 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
230                 dp->dccps_hc_rx_ackvec = NULL;
231         }
232         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
233         dp->dccps_hc_rx_ccid = NULL;
234
235         /* clean up feature negotiation state */
236         dccp_feat_list_purge(&dp->dccps_featneg);
237 }
238
239 EXPORT_SYMBOL_GPL(dccp_destroy_sock);
240
241 static inline int dccp_need_reset(int state)
242 {
243         return state != DCCP_CLOSED && state != DCCP_LISTEN &&
244                state != DCCP_REQUESTING;
245 }
246
247 int dccp_disconnect(struct sock *sk, int flags)
248 {
249         struct inet_connection_sock *icsk = inet_csk(sk);
250         struct inet_sock *inet = inet_sk(sk);
251         struct dccp_sock *dp = dccp_sk(sk);
252         const int old_state = sk->sk_state;
253
254         if (old_state != DCCP_CLOSED)
255                 dccp_set_state(sk, DCCP_CLOSED);
256
257         /*
258          * This corresponds to the ABORT function of RFC793, sec. 3.8
259          * TCP uses a RST segment, DCCP a Reset packet with Code 2, "Aborted".
260          */
261         if (old_state == DCCP_LISTEN) {
262                 inet_csk_listen_stop(sk);
263         } else if (dccp_need_reset(old_state)) {
264                 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
265                 sk->sk_err = ECONNRESET;
266         } else if (old_state == DCCP_REQUESTING)
267                 sk->sk_err = ECONNRESET;
268
269         dccp_clear_xmit_timers(sk);
270         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
271         dp->dccps_hc_rx_ccid = NULL;
272
273         __skb_queue_purge(&sk->sk_receive_queue);
274         __skb_queue_purge(&sk->sk_write_queue);
275         if (sk->sk_send_head != NULL) {
276                 __kfree_skb(sk->sk_send_head);
277                 sk->sk_send_head = NULL;
278         }
279
280         inet->inet_dport = 0;
281
282         inet_bhash2_reset_saddr(sk);
283
284         sk->sk_shutdown = 0;
285         sock_reset_flag(sk, SOCK_DONE);
286
287         icsk->icsk_backoff = 0;
288         inet_csk_delack_init(sk);
289         __sk_dst_reset(sk);
290
291         WARN_ON(inet->inet_num && !icsk->icsk_bind_hash);
292
293         sk_error_report(sk);
294         return 0;
295 }
296
297 EXPORT_SYMBOL_GPL(dccp_disconnect);
298
299 /*
300  *      Wait for a DCCP event.
301  *
302  *      Note that we don't need to lock the socket, as the upper poll layers
303  *      take care of normal races (between the test and the event) and we don't
304  *      go look at any of the socket buffers directly.
305  */
306 __poll_t dccp_poll(struct file *file, struct socket *sock,
307                        poll_table *wait)
308 {
309         __poll_t mask;
310         struct sock *sk = sock->sk;
311
312         sock_poll_wait(file, sock, wait);
313         if (sk->sk_state == DCCP_LISTEN)
314                 return inet_csk_listen_poll(sk);
315
316         /* Socket is not locked. We are protected from async events
317            by poll logic and correct handling of state changes
318            made by another threads is impossible in any case.
319          */
320
321         mask = 0;
322         if (sk->sk_err)
323                 mask = EPOLLERR;
324
325         if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
326                 mask |= EPOLLHUP;
327         if (sk->sk_shutdown & RCV_SHUTDOWN)
328                 mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
329
330         /* Connected? */
331         if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
332                 if (atomic_read(&sk->sk_rmem_alloc) > 0)
333                         mask |= EPOLLIN | EPOLLRDNORM;
334
335                 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
336                         if (sk_stream_is_writeable(sk)) {
337                                 mask |= EPOLLOUT | EPOLLWRNORM;
338                         } else {  /* send SIGIO later */
339                                 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
340                                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
341
342                                 /* Race breaker. If space is freed after
343                                  * wspace test but before the flags are set,
344                                  * IO signal will be lost.
345                                  */
346                                 if (sk_stream_is_writeable(sk))
347                                         mask |= EPOLLOUT | EPOLLWRNORM;
348                         }
349                 }
350         }
351         return mask;
352 }
353
354 EXPORT_SYMBOL_GPL(dccp_poll);
355
356 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
357 {
358         int rc = -ENOTCONN;
359
360         lock_sock(sk);
361
362         if (sk->sk_state == DCCP_LISTEN)
363                 goto out;
364
365         switch (cmd) {
366         case SIOCOUTQ: {
367                 int amount = sk_wmem_alloc_get(sk);
368                 /* Using sk_wmem_alloc here because sk_wmem_queued is not used by DCCP and
369                  * always 0, comparably to UDP.
370                  */
371
372                 rc = put_user(amount, (int __user *)arg);
373         }
374                 break;
375         case SIOCINQ: {
376                 struct sk_buff *skb;
377                 unsigned long amount = 0;
378
379                 skb = skb_peek(&sk->sk_receive_queue);
380                 if (skb != NULL) {
381                         /*
382                          * We will only return the amount of this packet since
383                          * that is all that will be read.
384                          */
385                         amount = skb->len;
386                 }
387                 rc = put_user(amount, (int __user *)arg);
388         }
389                 break;
390         default:
391                 rc = -ENOIOCTLCMD;
392                 break;
393         }
394 out:
395         release_sock(sk);
396         return rc;
397 }
398
399 EXPORT_SYMBOL_GPL(dccp_ioctl);
400
401 static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
402                                    sockptr_t optval, unsigned int optlen)
403 {
404         struct dccp_sock *dp = dccp_sk(sk);
405         struct dccp_service_list *sl = NULL;
406
407         if (service == DCCP_SERVICE_INVALID_VALUE ||
408             optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
409                 return -EINVAL;
410
411         if (optlen > sizeof(service)) {
412                 sl = kmalloc(optlen, GFP_KERNEL);
413                 if (sl == NULL)
414                         return -ENOMEM;
415
416                 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
417                 if (copy_from_sockptr_offset(sl->dccpsl_list, optval,
418                                 sizeof(service), optlen - sizeof(service)) ||
419                     dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
420                         kfree(sl);
421                         return -EFAULT;
422                 }
423         }
424
425         lock_sock(sk);
426         dp->dccps_service = service;
427
428         kfree(dp->dccps_service_list);
429
430         dp->dccps_service_list = sl;
431         release_sock(sk);
432         return 0;
433 }
434
435 static int dccp_setsockopt_cscov(struct sock *sk, int cscov, bool rx)
436 {
437         u8 *list, len;
438         int i, rc;
439
440         if (cscov < 0 || cscov > 15)
441                 return -EINVAL;
442         /*
443          * Populate a list of permissible values, in the range cscov...15. This
444          * is necessary since feature negotiation of single values only works if
445          * both sides incidentally choose the same value. Since the list starts
446          * lowest-value first, negotiation will pick the smallest shared value.
447          */
448         if (cscov == 0)
449                 return 0;
450         len = 16 - cscov;
451
452         list = kmalloc(len, GFP_KERNEL);
453         if (list == NULL)
454                 return -ENOBUFS;
455
456         for (i = 0; i < len; i++)
457                 list[i] = cscov++;
458
459         rc = dccp_feat_register_sp(sk, DCCPF_MIN_CSUM_COVER, rx, list, len);
460
461         if (rc == 0) {
462                 if (rx)
463                         dccp_sk(sk)->dccps_pcrlen = cscov;
464                 else
465                         dccp_sk(sk)->dccps_pcslen = cscov;
466         }
467         kfree(list);
468         return rc;
469 }
470
471 static int dccp_setsockopt_ccid(struct sock *sk, int type,
472                                 sockptr_t optval, unsigned int optlen)
473 {
474         u8 *val;
475         int rc = 0;
476
477         if (optlen < 1 || optlen > DCCP_FEAT_MAX_SP_VALS)
478                 return -EINVAL;
479
480         val = memdup_sockptr(optval, optlen);
481         if (IS_ERR(val))
482                 return PTR_ERR(val);
483
484         lock_sock(sk);
485         if (type == DCCP_SOCKOPT_TX_CCID || type == DCCP_SOCKOPT_CCID)
486                 rc = dccp_feat_register_sp(sk, DCCPF_CCID, 1, val, optlen);
487
488         if (!rc && (type == DCCP_SOCKOPT_RX_CCID || type == DCCP_SOCKOPT_CCID))
489                 rc = dccp_feat_register_sp(sk, DCCPF_CCID, 0, val, optlen);
490         release_sock(sk);
491
492         kfree(val);
493         return rc;
494 }
495
496 static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
497                 sockptr_t optval, unsigned int optlen)
498 {
499         struct dccp_sock *dp = dccp_sk(sk);
500         int val, err = 0;
501
502         switch (optname) {
503         case DCCP_SOCKOPT_PACKET_SIZE:
504                 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
505                 return 0;
506         case DCCP_SOCKOPT_CHANGE_L:
507         case DCCP_SOCKOPT_CHANGE_R:
508                 DCCP_WARN("sockopt(CHANGE_L/R) is deprecated: fix your app\n");
509                 return 0;
510         case DCCP_SOCKOPT_CCID:
511         case DCCP_SOCKOPT_RX_CCID:
512         case DCCP_SOCKOPT_TX_CCID:
513                 return dccp_setsockopt_ccid(sk, optname, optval, optlen);
514         }
515
516         if (optlen < (int)sizeof(int))
517                 return -EINVAL;
518
519         if (copy_from_sockptr(&val, optval, sizeof(int)))
520                 return -EFAULT;
521
522         if (optname == DCCP_SOCKOPT_SERVICE)
523                 return dccp_setsockopt_service(sk, val, optval, optlen);
524
525         lock_sock(sk);
526         switch (optname) {
527         case DCCP_SOCKOPT_SERVER_TIMEWAIT:
528                 if (dp->dccps_role != DCCP_ROLE_SERVER)
529                         err = -EOPNOTSUPP;
530                 else
531                         dp->dccps_server_timewait = (val != 0);
532                 break;
533         case DCCP_SOCKOPT_SEND_CSCOV:
534                 err = dccp_setsockopt_cscov(sk, val, false);
535                 break;
536         case DCCP_SOCKOPT_RECV_CSCOV:
537                 err = dccp_setsockopt_cscov(sk, val, true);
538                 break;
539         case DCCP_SOCKOPT_QPOLICY_ID:
540                 if (sk->sk_state != DCCP_CLOSED)
541                         err = -EISCONN;
542                 else if (val < 0 || val >= DCCPQ_POLICY_MAX)
543                         err = -EINVAL;
544                 else
545                         dp->dccps_qpolicy = val;
546                 break;
547         case DCCP_SOCKOPT_QPOLICY_TXQLEN:
548                 if (val < 0)
549                         err = -EINVAL;
550                 else
551                         dp->dccps_tx_qlen = val;
552                 break;
553         default:
554                 err = -ENOPROTOOPT;
555                 break;
556         }
557         release_sock(sk);
558
559         return err;
560 }
561
562 int dccp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
563                     unsigned int optlen)
564 {
565         if (level != SOL_DCCP)
566                 return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
567                                                              optname, optval,
568                                                              optlen);
569         return do_dccp_setsockopt(sk, level, optname, optval, optlen);
570 }
571
572 EXPORT_SYMBOL_GPL(dccp_setsockopt);
573
574 static int dccp_getsockopt_service(struct sock *sk, int len,
575                                    __be32 __user *optval,
576                                    int __user *optlen)
577 {
578         const struct dccp_sock *dp = dccp_sk(sk);
579         const struct dccp_service_list *sl;
580         int err = -ENOENT, slen = 0, total_len = sizeof(u32);
581
582         lock_sock(sk);
583         if ((sl = dp->dccps_service_list) != NULL) {
584                 slen = sl->dccpsl_nr * sizeof(u32);
585                 total_len += slen;
586         }
587
588         err = -EINVAL;
589         if (total_len > len)
590                 goto out;
591
592         err = 0;
593         if (put_user(total_len, optlen) ||
594             put_user(dp->dccps_service, optval) ||
595             (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
596                 err = -EFAULT;
597 out:
598         release_sock(sk);
599         return err;
600 }
601
602 static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
603                     char __user *optval, int __user *optlen)
604 {
605         struct dccp_sock *dp;
606         int val, len;
607
608         if (get_user(len, optlen))
609                 return -EFAULT;
610
611         if (len < (int)sizeof(int))
612                 return -EINVAL;
613
614         dp = dccp_sk(sk);
615
616         switch (optname) {
617         case DCCP_SOCKOPT_PACKET_SIZE:
618                 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
619                 return 0;
620         case DCCP_SOCKOPT_SERVICE:
621                 return dccp_getsockopt_service(sk, len,
622                                                (__be32 __user *)optval, optlen);
623         case DCCP_SOCKOPT_GET_CUR_MPS:
624                 val = dp->dccps_mss_cache;
625                 break;
626         case DCCP_SOCKOPT_AVAILABLE_CCIDS:
627                 return ccid_getsockopt_builtin_ccids(sk, len, optval, optlen);
628         case DCCP_SOCKOPT_TX_CCID:
629                 val = ccid_get_current_tx_ccid(dp);
630                 if (val < 0)
631                         return -ENOPROTOOPT;
632                 break;
633         case DCCP_SOCKOPT_RX_CCID:
634                 val = ccid_get_current_rx_ccid(dp);
635                 if (val < 0)
636                         return -ENOPROTOOPT;
637                 break;
638         case DCCP_SOCKOPT_SERVER_TIMEWAIT:
639                 val = dp->dccps_server_timewait;
640                 break;
641         case DCCP_SOCKOPT_SEND_CSCOV:
642                 val = dp->dccps_pcslen;
643                 break;
644         case DCCP_SOCKOPT_RECV_CSCOV:
645                 val = dp->dccps_pcrlen;
646                 break;
647         case DCCP_SOCKOPT_QPOLICY_ID:
648                 val = dp->dccps_qpolicy;
649                 break;
650         case DCCP_SOCKOPT_QPOLICY_TXQLEN:
651                 val = dp->dccps_tx_qlen;
652                 break;
653         case 128 ... 191:
654                 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
655                                              len, (u32 __user *)optval, optlen);
656         case 192 ... 255:
657                 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
658                                              len, (u32 __user *)optval, optlen);
659         default:
660                 return -ENOPROTOOPT;
661         }
662
663         len = sizeof(val);
664         if (put_user(len, optlen) || copy_to_user(optval, &val, len))
665                 return -EFAULT;
666
667         return 0;
668 }
669
670 int dccp_getsockopt(struct sock *sk, int level, int optname,
671                     char __user *optval, int __user *optlen)
672 {
673         if (level != SOL_DCCP)
674                 return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
675                                                              optname, optval,
676                                                              optlen);
677         return do_dccp_getsockopt(sk, level, optname, optval, optlen);
678 }
679
680 EXPORT_SYMBOL_GPL(dccp_getsockopt);
681
682 static int dccp_msghdr_parse(struct msghdr *msg, struct sk_buff *skb)
683 {
684         struct cmsghdr *cmsg;
685
686         /*
687          * Assign an (opaque) qpolicy priority value to skb->priority.
688          *
689          * We are overloading this skb field for use with the qpolicy subystem.
690          * The skb->priority is normally used for the SO_PRIORITY option, which
691          * is initialised from sk_priority. Since the assignment of sk_priority
692          * to skb->priority happens later (on layer 3), we overload this field
693          * for use with queueing priorities as long as the skb is on layer 4.
694          * The default priority value (if nothing is set) is 0.
695          */
696         skb->priority = 0;
697
698         for_each_cmsghdr(cmsg, msg) {
699                 if (!CMSG_OK(msg, cmsg))
700                         return -EINVAL;
701
702                 if (cmsg->cmsg_level != SOL_DCCP)
703                         continue;
704
705                 if (cmsg->cmsg_type <= DCCP_SCM_QPOLICY_MAX &&
706                     !dccp_qpolicy_param_ok(skb->sk, cmsg->cmsg_type))
707                         return -EINVAL;
708
709                 switch (cmsg->cmsg_type) {
710                 case DCCP_SCM_PRIORITY:
711                         if (cmsg->cmsg_len != CMSG_LEN(sizeof(__u32)))
712                                 return -EINVAL;
713                         skb->priority = *(__u32 *)CMSG_DATA(cmsg);
714                         break;
715                 default:
716                         return -EINVAL;
717                 }
718         }
719         return 0;
720 }
721
722 int dccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
723 {
724         const struct dccp_sock *dp = dccp_sk(sk);
725         const int flags = msg->msg_flags;
726         const int noblock = flags & MSG_DONTWAIT;
727         struct sk_buff *skb;
728         int rc, size;
729         long timeo;
730
731         trace_dccp_probe(sk, len);
732
733         if (len > dp->dccps_mss_cache)
734                 return -EMSGSIZE;
735
736         lock_sock(sk);
737
738         timeo = sock_sndtimeo(sk, noblock);
739
740         /*
741          * We have to use sk_stream_wait_connect here to set sk_write_pending,
742          * so that the trick in dccp_rcv_request_sent_state_process.
743          */
744         /* Wait for a connection to finish. */
745         if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN))
746                 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
747                         goto out_release;
748
749         size = sk->sk_prot->max_header + len;
750         release_sock(sk);
751         skb = sock_alloc_send_skb(sk, size, noblock, &rc);
752         lock_sock(sk);
753         if (skb == NULL)
754                 goto out_release;
755
756         if (dccp_qpolicy_full(sk)) {
757                 rc = -EAGAIN;
758                 goto out_discard;
759         }
760
761         if (sk->sk_state == DCCP_CLOSED) {
762                 rc = -ENOTCONN;
763                 goto out_discard;
764         }
765
766         skb_reserve(skb, sk->sk_prot->max_header);
767         rc = memcpy_from_msg(skb_put(skb, len), msg, len);
768         if (rc != 0)
769                 goto out_discard;
770
771         rc = dccp_msghdr_parse(msg, skb);
772         if (rc != 0)
773                 goto out_discard;
774
775         dccp_qpolicy_push(sk, skb);
776         /*
777          * The xmit_timer is set if the TX CCID is rate-based and will expire
778          * when congestion control permits to release further packets into the
779          * network. Window-based CCIDs do not use this timer.
780          */
781         if (!timer_pending(&dp->dccps_xmit_timer))
782                 dccp_write_xmit(sk);
783 out_release:
784         release_sock(sk);
785         return rc ? : len;
786 out_discard:
787         kfree_skb(skb);
788         goto out_release;
789 }
790
791 EXPORT_SYMBOL_GPL(dccp_sendmsg);
792
793 int dccp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags,
794                  int *addr_len)
795 {
796         const struct dccp_hdr *dh;
797         long timeo;
798
799         lock_sock(sk);
800
801         if (sk->sk_state == DCCP_LISTEN) {
802                 len = -ENOTCONN;
803                 goto out;
804         }
805
806         timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
807
808         do {
809                 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
810
811                 if (skb == NULL)
812                         goto verify_sock_status;
813
814                 dh = dccp_hdr(skb);
815
816                 switch (dh->dccph_type) {
817                 case DCCP_PKT_DATA:
818                 case DCCP_PKT_DATAACK:
819                         goto found_ok_skb;
820
821                 case DCCP_PKT_CLOSE:
822                 case DCCP_PKT_CLOSEREQ:
823                         if (!(flags & MSG_PEEK))
824                                 dccp_finish_passive_close(sk);
825                         fallthrough;
826                 case DCCP_PKT_RESET:
827                         dccp_pr_debug("found fin (%s) ok!\n",
828                                       dccp_packet_name(dh->dccph_type));
829                         len = 0;
830                         goto found_fin_ok;
831                 default:
832                         dccp_pr_debug("packet_type=%s\n",
833                                       dccp_packet_name(dh->dccph_type));
834                         sk_eat_skb(sk, skb);
835                 }
836 verify_sock_status:
837                 if (sock_flag(sk, SOCK_DONE)) {
838                         len = 0;
839                         break;
840                 }
841
842                 if (sk->sk_err) {
843                         len = sock_error(sk);
844                         break;
845                 }
846
847                 if (sk->sk_shutdown & RCV_SHUTDOWN) {
848                         len = 0;
849                         break;
850                 }
851
852                 if (sk->sk_state == DCCP_CLOSED) {
853                         if (!sock_flag(sk, SOCK_DONE)) {
854                                 /* This occurs when user tries to read
855                                  * from never connected socket.
856                                  */
857                                 len = -ENOTCONN;
858                                 break;
859                         }
860                         len = 0;
861                         break;
862                 }
863
864                 if (!timeo) {
865                         len = -EAGAIN;
866                         break;
867                 }
868
869                 if (signal_pending(current)) {
870                         len = sock_intr_errno(timeo);
871                         break;
872                 }
873
874                 sk_wait_data(sk, &timeo, NULL);
875                 continue;
876         found_ok_skb:
877                 if (len > skb->len)
878                         len = skb->len;
879                 else if (len < skb->len)
880                         msg->msg_flags |= MSG_TRUNC;
881
882                 if (skb_copy_datagram_msg(skb, 0, msg, len)) {
883                         /* Exception. Bailout! */
884                         len = -EFAULT;
885                         break;
886                 }
887                 if (flags & MSG_TRUNC)
888                         len = skb->len;
889         found_fin_ok:
890                 if (!(flags & MSG_PEEK))
891                         sk_eat_skb(sk, skb);
892                 break;
893         } while (1);
894 out:
895         release_sock(sk);
896         return len;
897 }
898
899 EXPORT_SYMBOL_GPL(dccp_recvmsg);
900
901 int inet_dccp_listen(struct socket *sock, int backlog)
902 {
903         struct sock *sk = sock->sk;
904         unsigned char old_state;
905         int err;
906
907         lock_sock(sk);
908
909         err = -EINVAL;
910         if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
911                 goto out;
912
913         old_state = sk->sk_state;
914         if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
915                 goto out;
916
917         WRITE_ONCE(sk->sk_max_ack_backlog, backlog);
918         /* Really, if the socket is already in listen state
919          * we can only allow the backlog to be adjusted.
920          */
921         if (old_state != DCCP_LISTEN) {
922                 struct dccp_sock *dp = dccp_sk(sk);
923
924                 dp->dccps_role = DCCP_ROLE_LISTEN;
925
926                 /* do not start to listen if feature negotiation setup fails */
927                 if (dccp_feat_finalise_settings(dp)) {
928                         err = -EPROTO;
929                         goto out;
930                 }
931
932                 err = inet_csk_listen_start(sk);
933                 if (err)
934                         goto out;
935         }
936         err = 0;
937
938 out:
939         release_sock(sk);
940         return err;
941 }
942
943 EXPORT_SYMBOL_GPL(inet_dccp_listen);
944
945 static void dccp_terminate_connection(struct sock *sk)
946 {
947         u8 next_state = DCCP_CLOSED;
948
949         switch (sk->sk_state) {
950         case DCCP_PASSIVE_CLOSE:
951         case DCCP_PASSIVE_CLOSEREQ:
952                 dccp_finish_passive_close(sk);
953                 break;
954         case DCCP_PARTOPEN:
955                 dccp_pr_debug("Stop PARTOPEN timer (%p)\n", sk);
956                 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
957                 fallthrough;
958         case DCCP_OPEN:
959                 dccp_send_close(sk, 1);
960
961                 if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER &&
962                     !dccp_sk(sk)->dccps_server_timewait)
963                         next_state = DCCP_ACTIVE_CLOSEREQ;
964                 else
965                         next_state = DCCP_CLOSING;
966                 fallthrough;
967         default:
968                 dccp_set_state(sk, next_state);
969         }
970 }
971
972 void dccp_close(struct sock *sk, long timeout)
973 {
974         struct dccp_sock *dp = dccp_sk(sk);
975         struct sk_buff *skb;
976         u32 data_was_unread = 0;
977         int state;
978
979         lock_sock(sk);
980
981         sk->sk_shutdown = SHUTDOWN_MASK;
982
983         if (sk->sk_state == DCCP_LISTEN) {
984                 dccp_set_state(sk, DCCP_CLOSED);
985
986                 /* Special case. */
987                 inet_csk_listen_stop(sk);
988
989                 goto adjudge_to_death;
990         }
991
992         sk_stop_timer(sk, &dp->dccps_xmit_timer);
993
994         /*
995          * We need to flush the recv. buffs.  We do this only on the
996          * descriptor close, not protocol-sourced closes, because the
997           *reader process may not have drained the data yet!
998          */
999         while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
1000                 data_was_unread += skb->len;
1001                 __kfree_skb(skb);
1002         }
1003
1004         /* If socket has been already reset kill it. */
1005         if (sk->sk_state == DCCP_CLOSED)
1006                 goto adjudge_to_death;
1007
1008         if (data_was_unread) {
1009                 /* Unread data was tossed, send an appropriate Reset Code */
1010                 DCCP_WARN("ABORT with %u bytes unread\n", data_was_unread);
1011                 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
1012                 dccp_set_state(sk, DCCP_CLOSED);
1013         } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
1014                 /* Check zero linger _after_ checking for unread data. */
1015                 sk->sk_prot->disconnect(sk, 0);
1016         } else if (sk->sk_state != DCCP_CLOSED) {
1017                 /*
1018                  * Normal connection termination. May need to wait if there are
1019                  * still packets in the TX queue that are delayed by the CCID.
1020                  */
1021                 dccp_flush_write_queue(sk, &timeout);
1022                 dccp_terminate_connection(sk);
1023         }
1024
1025         /*
1026          * Flush write queue. This may be necessary in several cases:
1027          * - we have been closed by the peer but still have application data;
1028          * - abortive termination (unread data or zero linger time),
1029          * - normal termination but queue could not be flushed within time limit
1030          */
1031         __skb_queue_purge(&sk->sk_write_queue);
1032
1033         sk_stream_wait_close(sk, timeout);
1034
1035 adjudge_to_death:
1036         state = sk->sk_state;
1037         sock_hold(sk);
1038         sock_orphan(sk);
1039
1040         /*
1041          * It is the last release_sock in its life. It will remove backlog.
1042          */
1043         release_sock(sk);
1044         /*
1045          * Now socket is owned by kernel and we acquire BH lock
1046          * to finish close. No need to check for user refs.
1047          */
1048         local_bh_disable();
1049         bh_lock_sock(sk);
1050         WARN_ON(sock_owned_by_user(sk));
1051
1052         this_cpu_inc(dccp_orphan_count);
1053
1054         /* Have we already been destroyed by a softirq or backlog? */
1055         if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
1056                 goto out;
1057
1058         if (sk->sk_state == DCCP_CLOSED)
1059                 inet_csk_destroy_sock(sk);
1060
1061         /* Otherwise, socket is reprieved until protocol close. */
1062
1063 out:
1064         bh_unlock_sock(sk);
1065         local_bh_enable();
1066         sock_put(sk);
1067 }
1068
1069 EXPORT_SYMBOL_GPL(dccp_close);
1070
1071 void dccp_shutdown(struct sock *sk, int how)
1072 {
1073         dccp_pr_debug("called shutdown(%x)\n", how);
1074 }
1075
1076 EXPORT_SYMBOL_GPL(dccp_shutdown);
1077
1078 static inline int __init dccp_mib_init(void)
1079 {
1080         dccp_statistics = alloc_percpu(struct dccp_mib);
1081         if (!dccp_statistics)
1082                 return -ENOMEM;
1083         return 0;
1084 }
1085
1086 static inline void dccp_mib_exit(void)
1087 {
1088         free_percpu(dccp_statistics);
1089 }
1090
1091 static int thash_entries;
1092 module_param(thash_entries, int, 0444);
1093 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
1094
1095 #ifdef CONFIG_IP_DCCP_DEBUG
1096 bool dccp_debug;
1097 module_param(dccp_debug, bool, 0644);
1098 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
1099
1100 EXPORT_SYMBOL_GPL(dccp_debug);
1101 #endif
1102
1103 static int __init dccp_init(void)
1104 {
1105         unsigned long goal;
1106         unsigned long nr_pages = totalram_pages();
1107         int ehash_order, bhash_order, i;
1108         int rc;
1109
1110         BUILD_BUG_ON(sizeof(struct dccp_skb_cb) >
1111                      sizeof_field(struct sk_buff, cb));
1112         rc = inet_hashinfo2_init_mod(&dccp_hashinfo);
1113         if (rc)
1114                 goto out_fail;
1115         rc = -ENOBUFS;
1116         dccp_hashinfo.bind_bucket_cachep =
1117                 kmem_cache_create("dccp_bind_bucket",
1118                                   sizeof(struct inet_bind_bucket), 0,
1119                                   SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT, NULL);
1120         if (!dccp_hashinfo.bind_bucket_cachep)
1121                 goto out_free_hashinfo2;
1122         dccp_hashinfo.bind2_bucket_cachep =
1123                 kmem_cache_create("dccp_bind2_bucket",
1124                                   sizeof(struct inet_bind2_bucket), 0,
1125                                   SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT, NULL);
1126         if (!dccp_hashinfo.bind2_bucket_cachep)
1127                 goto out_free_bind_bucket_cachep;
1128
1129         /*
1130          * Size and allocate the main established and bind bucket
1131          * hash tables.
1132          *
1133          * The methodology is similar to that of the buffer cache.
1134          */
1135         if (nr_pages >= (128 * 1024))
1136                 goal = nr_pages >> (21 - PAGE_SHIFT);
1137         else
1138                 goal = nr_pages >> (23 - PAGE_SHIFT);
1139
1140         if (thash_entries)
1141                 goal = (thash_entries *
1142                         sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
1143         for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
1144                 ;
1145         do {
1146                 unsigned long hash_size = (1UL << ehash_order) * PAGE_SIZE /
1147                                         sizeof(struct inet_ehash_bucket);
1148
1149                 while (hash_size & (hash_size - 1))
1150                         hash_size--;
1151                 dccp_hashinfo.ehash_mask = hash_size - 1;
1152                 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1153                         __get_free_pages(GFP_ATOMIC|__GFP_NOWARN, ehash_order);
1154         } while (!dccp_hashinfo.ehash && --ehash_order > 0);
1155
1156         if (!dccp_hashinfo.ehash) {
1157                 DCCP_CRIT("Failed to allocate DCCP established hash table");
1158                 goto out_free_bind2_bucket_cachep;
1159         }
1160
1161         for (i = 0; i <= dccp_hashinfo.ehash_mask; i++)
1162                 INIT_HLIST_NULLS_HEAD(&dccp_hashinfo.ehash[i].chain, i);
1163
1164         if (inet_ehash_locks_alloc(&dccp_hashinfo))
1165                         goto out_free_dccp_ehash;
1166
1167         bhash_order = ehash_order;
1168
1169         do {
1170                 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1171                                         sizeof(struct inet_bind_hashbucket);
1172                 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1173                     bhash_order > 0)
1174                         continue;
1175                 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1176                         __get_free_pages(GFP_ATOMIC|__GFP_NOWARN, bhash_order);
1177         } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1178
1179         if (!dccp_hashinfo.bhash) {
1180                 DCCP_CRIT("Failed to allocate DCCP bind hash table");
1181                 goto out_free_dccp_locks;
1182         }
1183
1184         dccp_hashinfo.bhash2 = (struct inet_bind_hashbucket *)
1185                 __get_free_pages(GFP_ATOMIC | __GFP_NOWARN, bhash_order);
1186
1187         if (!dccp_hashinfo.bhash2) {
1188                 DCCP_CRIT("Failed to allocate DCCP bind2 hash table");
1189                 goto out_free_dccp_bhash;
1190         }
1191
1192         for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1193                 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1194                 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1195                 spin_lock_init(&dccp_hashinfo.bhash2[i].lock);
1196                 INIT_HLIST_HEAD(&dccp_hashinfo.bhash2[i].chain);
1197         }
1198
1199         dccp_hashinfo.pernet = false;
1200
1201         rc = dccp_mib_init();
1202         if (rc)
1203                 goto out_free_dccp_bhash2;
1204
1205         rc = dccp_ackvec_init();
1206         if (rc)
1207                 goto out_free_dccp_mib;
1208
1209         rc = dccp_sysctl_init();
1210         if (rc)
1211                 goto out_ackvec_exit;
1212
1213         rc = ccid_initialize_builtins();
1214         if (rc)
1215                 goto out_sysctl_exit;
1216
1217         dccp_timestamping_init();
1218
1219         return 0;
1220
1221 out_sysctl_exit:
1222         dccp_sysctl_exit();
1223 out_ackvec_exit:
1224         dccp_ackvec_exit();
1225 out_free_dccp_mib:
1226         dccp_mib_exit();
1227 out_free_dccp_bhash2:
1228         free_pages((unsigned long)dccp_hashinfo.bhash2, bhash_order);
1229 out_free_dccp_bhash:
1230         free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1231 out_free_dccp_locks:
1232         inet_ehash_locks_free(&dccp_hashinfo);
1233 out_free_dccp_ehash:
1234         free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1235 out_free_bind2_bucket_cachep:
1236         kmem_cache_destroy(dccp_hashinfo.bind2_bucket_cachep);
1237 out_free_bind_bucket_cachep:
1238         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1239 out_free_hashinfo2:
1240         inet_hashinfo2_free_mod(&dccp_hashinfo);
1241 out_fail:
1242         dccp_hashinfo.bhash = NULL;
1243         dccp_hashinfo.bhash2 = NULL;
1244         dccp_hashinfo.ehash = NULL;
1245         dccp_hashinfo.bind_bucket_cachep = NULL;
1246         dccp_hashinfo.bind2_bucket_cachep = NULL;
1247         return rc;
1248 }
1249
1250 static void __exit dccp_fini(void)
1251 {
1252         int bhash_order = get_order(dccp_hashinfo.bhash_size *
1253                                     sizeof(struct inet_bind_hashbucket));
1254
1255         ccid_cleanup_builtins();
1256         dccp_mib_exit();
1257         free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1258         free_pages((unsigned long)dccp_hashinfo.bhash2, bhash_order);
1259         free_pages((unsigned long)dccp_hashinfo.ehash,
1260                    get_order((dccp_hashinfo.ehash_mask + 1) *
1261                              sizeof(struct inet_ehash_bucket)));
1262         inet_ehash_locks_free(&dccp_hashinfo);
1263         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1264         dccp_ackvec_exit();
1265         dccp_sysctl_exit();
1266         inet_hashinfo2_free_mod(&dccp_hashinfo);
1267 }
1268
1269 module_init(dccp_init);
1270 module_exit(dccp_fini);
1271
1272 MODULE_LICENSE("GPL");
1273 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1274 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");