Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
[platform/kernel/linux-starfive.git] / net / dccp / ipv6.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *      DCCP over IPv6
4  *      Linux INET6 implementation
5  *
6  *      Based on net/dccp6/ipv6.c
7  *
8  *      Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
9  */
10
11 #include <linux/module.h>
12 #include <linux/random.h>
13 #include <linux/slab.h>
14 #include <linux/xfrm.h>
15 #include <linux/string.h>
16
17 #include <net/addrconf.h>
18 #include <net/inet_common.h>
19 #include <net/inet_hashtables.h>
20 #include <net/inet_sock.h>
21 #include <net/inet6_connection_sock.h>
22 #include <net/inet6_hashtables.h>
23 #include <net/ip6_route.h>
24 #include <net/ipv6.h>
25 #include <net/protocol.h>
26 #include <net/transp_v6.h>
27 #include <net/ip6_checksum.h>
28 #include <net/xfrm.h>
29 #include <net/secure_seq.h>
30 #include <net/netns/generic.h>
31 #include <net/sock.h>
32
33 #include "dccp.h"
34 #include "ipv6.h"
35 #include "feat.h"
36
37 struct dccp_v6_pernet {
38         struct sock *v6_ctl_sk;
39 };
40
41 static unsigned int dccp_v6_pernet_id __read_mostly;
42
43 /* The per-net v6_ctl_sk is used for sending RSTs and ACKs */
44
45 static const struct inet_connection_sock_af_ops dccp_ipv6_mapped;
46 static const struct inet_connection_sock_af_ops dccp_ipv6_af_ops;
47
48 /* add pseudo-header to DCCP checksum stored in skb->csum */
49 static inline __sum16 dccp_v6_csum_finish(struct sk_buff *skb,
50                                       const struct in6_addr *saddr,
51                                       const struct in6_addr *daddr)
52 {
53         return csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_DCCP, skb->csum);
54 }
55
56 static inline void dccp_v6_send_check(struct sock *sk, struct sk_buff *skb)
57 {
58         struct ipv6_pinfo *np = inet6_sk(sk);
59         struct dccp_hdr *dh = dccp_hdr(skb);
60
61         dccp_csum_outgoing(skb);
62         dh->dccph_checksum = dccp_v6_csum_finish(skb, &np->saddr, &sk->sk_v6_daddr);
63 }
64
65 static inline __u64 dccp_v6_init_sequence(struct sk_buff *skb)
66 {
67         return secure_dccpv6_sequence_number(ipv6_hdr(skb)->daddr.s6_addr32,
68                                              ipv6_hdr(skb)->saddr.s6_addr32,
69                                              dccp_hdr(skb)->dccph_dport,
70                                              dccp_hdr(skb)->dccph_sport     );
71
72 }
73
74 static int dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
75                         u8 type, u8 code, int offset, __be32 info)
76 {
77         const struct ipv6hdr *hdr;
78         const struct dccp_hdr *dh;
79         struct dccp_sock *dp;
80         struct ipv6_pinfo *np;
81         struct sock *sk;
82         int err;
83         __u64 seq;
84         struct net *net = dev_net(skb->dev);
85
86         /* For the first __dccp_basic_hdr_len() check, we only need dh->dccph_x,
87          * which is in byte 7 of the dccp header.
88          * Our caller (icmpv6_notify()) already pulled 8 bytes for us.
89          *
90          * Later on, we want to access the sequence number fields, which are
91          * beyond 8 bytes, so we have to pskb_may_pull() ourselves.
92          */
93         dh = (struct dccp_hdr *)(skb->data + offset);
94         if (!pskb_may_pull(skb, offset + __dccp_basic_hdr_len(dh)))
95                 return -EINVAL;
96         hdr = (const struct ipv6hdr *)skb->data;
97         dh = (struct dccp_hdr *)(skb->data + offset);
98
99         sk = __inet6_lookup_established(net, &dccp_hashinfo,
100                                         &hdr->daddr, dh->dccph_dport,
101                                         &hdr->saddr, ntohs(dh->dccph_sport),
102                                         inet6_iif(skb), 0);
103
104         if (!sk) {
105                 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
106                                   ICMP6_MIB_INERRORS);
107                 return -ENOENT;
108         }
109
110         if (sk->sk_state == DCCP_TIME_WAIT) {
111                 inet_twsk_put(inet_twsk(sk));
112                 return 0;
113         }
114         seq = dccp_hdr_seq(dh);
115         if (sk->sk_state == DCCP_NEW_SYN_RECV) {
116                 dccp_req_err(sk, seq);
117                 return 0;
118         }
119
120         bh_lock_sock(sk);
121         if (sock_owned_by_user(sk))
122                 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
123
124         if (sk->sk_state == DCCP_CLOSED)
125                 goto out;
126
127         dp = dccp_sk(sk);
128         if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_LISTEN) &&
129             !between48(seq, dp->dccps_awl, dp->dccps_awh)) {
130                 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
131                 goto out;
132         }
133
134         np = inet6_sk(sk);
135
136         if (type == NDISC_REDIRECT) {
137                 if (!sock_owned_by_user(sk)) {
138                         struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
139
140                         if (dst)
141                                 dst->ops->redirect(dst, sk, skb);
142                 }
143                 goto out;
144         }
145
146         if (type == ICMPV6_PKT_TOOBIG) {
147                 struct dst_entry *dst = NULL;
148
149                 if (!ip6_sk_accept_pmtu(sk))
150                         goto out;
151
152                 if (sock_owned_by_user(sk))
153                         goto out;
154                 if ((1 << sk->sk_state) & (DCCPF_LISTEN | DCCPF_CLOSED))
155                         goto out;
156
157                 dst = inet6_csk_update_pmtu(sk, ntohl(info));
158                 if (!dst)
159                         goto out;
160
161                 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst))
162                         dccp_sync_mss(sk, dst_mtu(dst));
163                 goto out;
164         }
165
166         icmpv6_err_convert(type, code, &err);
167
168         /* Might be for an request_sock */
169         switch (sk->sk_state) {
170         case DCCP_REQUESTING:
171         case DCCP_RESPOND:  /* Cannot happen.
172                                It can, it SYNs are crossed. --ANK */
173                 if (!sock_owned_by_user(sk)) {
174                         __DCCP_INC_STATS(DCCP_MIB_ATTEMPTFAILS);
175                         sk->sk_err = err;
176                         /*
177                          * Wake people up to see the error
178                          * (see connect in sock.c)
179                          */
180                         sk_error_report(sk);
181                         dccp_done(sk);
182                 } else {
183                         WRITE_ONCE(sk->sk_err_soft, err);
184                 }
185                 goto out;
186         }
187
188         if (!sock_owned_by_user(sk) && np->recverr) {
189                 sk->sk_err = err;
190                 sk_error_report(sk);
191         } else {
192                 WRITE_ONCE(sk->sk_err_soft, err);
193         }
194 out:
195         bh_unlock_sock(sk);
196         sock_put(sk);
197         return 0;
198 }
199
200
201 static int dccp_v6_send_response(const struct sock *sk, struct request_sock *req)
202 {
203         struct inet_request_sock *ireq = inet_rsk(req);
204         struct ipv6_pinfo *np = inet6_sk(sk);
205         struct sk_buff *skb;
206         struct in6_addr *final_p, final;
207         struct flowi6 fl6;
208         int err = -1;
209         struct dst_entry *dst;
210
211         memset(&fl6, 0, sizeof(fl6));
212         fl6.flowi6_proto = IPPROTO_DCCP;
213         fl6.daddr = ireq->ir_v6_rmt_addr;
214         fl6.saddr = ireq->ir_v6_loc_addr;
215         fl6.flowlabel = 0;
216         fl6.flowi6_oif = ireq->ir_iif;
217         fl6.fl6_dport = ireq->ir_rmt_port;
218         fl6.fl6_sport = htons(ireq->ir_num);
219         security_req_classify_flow(req, flowi6_to_flowi_common(&fl6));
220
221
222         rcu_read_lock();
223         final_p = fl6_update_dst(&fl6, rcu_dereference(np->opt), &final);
224         rcu_read_unlock();
225
226         dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
227         if (IS_ERR(dst)) {
228                 err = PTR_ERR(dst);
229                 dst = NULL;
230                 goto done;
231         }
232
233         skb = dccp_make_response(sk, dst, req);
234         if (skb != NULL) {
235                 struct dccp_hdr *dh = dccp_hdr(skb);
236                 struct ipv6_txoptions *opt;
237
238                 dh->dccph_checksum = dccp_v6_csum_finish(skb,
239                                                          &ireq->ir_v6_loc_addr,
240                                                          &ireq->ir_v6_rmt_addr);
241                 fl6.daddr = ireq->ir_v6_rmt_addr;
242                 rcu_read_lock();
243                 opt = ireq->ipv6_opt;
244                 if (!opt)
245                         opt = rcu_dereference(np->opt);
246                 err = ip6_xmit(sk, skb, &fl6, READ_ONCE(sk->sk_mark), opt,
247                                np->tclass, sk->sk_priority);
248                 rcu_read_unlock();
249                 err = net_xmit_eval(err);
250         }
251
252 done:
253         dst_release(dst);
254         return err;
255 }
256
257 static void dccp_v6_reqsk_destructor(struct request_sock *req)
258 {
259         dccp_feat_list_purge(&dccp_rsk(req)->dreq_featneg);
260         kfree(inet_rsk(req)->ipv6_opt);
261         kfree_skb(inet_rsk(req)->pktopts);
262 }
263
264 static void dccp_v6_ctl_send_reset(const struct sock *sk, struct sk_buff *rxskb)
265 {
266         const struct ipv6hdr *rxip6h;
267         struct sk_buff *skb;
268         struct flowi6 fl6;
269         struct net *net = dev_net(skb_dst(rxskb)->dev);
270         struct dccp_v6_pernet *pn;
271         struct sock *ctl_sk;
272         struct dst_entry *dst;
273
274         if (dccp_hdr(rxskb)->dccph_type == DCCP_PKT_RESET)
275                 return;
276
277         if (!ipv6_unicast_destination(rxskb))
278                 return;
279
280         pn = net_generic(net, dccp_v6_pernet_id);
281         ctl_sk = pn->v6_ctl_sk;
282         skb = dccp_ctl_make_reset(ctl_sk, rxskb);
283         if (skb == NULL)
284                 return;
285
286         rxip6h = ipv6_hdr(rxskb);
287         dccp_hdr(skb)->dccph_checksum = dccp_v6_csum_finish(skb, &rxip6h->saddr,
288                                                             &rxip6h->daddr);
289
290         memset(&fl6, 0, sizeof(fl6));
291         fl6.daddr = rxip6h->saddr;
292         fl6.saddr = rxip6h->daddr;
293
294         fl6.flowi6_proto = IPPROTO_DCCP;
295         fl6.flowi6_oif = inet6_iif(rxskb);
296         fl6.fl6_dport = dccp_hdr(skb)->dccph_dport;
297         fl6.fl6_sport = dccp_hdr(skb)->dccph_sport;
298         security_skb_classify_flow(rxskb, flowi6_to_flowi_common(&fl6));
299
300         /* sk = NULL, but it is safe for now. RST socket required. */
301         dst = ip6_dst_lookup_flow(sock_net(ctl_sk), ctl_sk, &fl6, NULL);
302         if (!IS_ERR(dst)) {
303                 skb_dst_set(skb, dst);
304                 ip6_xmit(ctl_sk, skb, &fl6, 0, NULL, 0, 0);
305                 DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
306                 DCCP_INC_STATS(DCCP_MIB_OUTRSTS);
307                 return;
308         }
309
310         kfree_skb(skb);
311 }
312
313 static struct request_sock_ops dccp6_request_sock_ops = {
314         .family         = AF_INET6,
315         .obj_size       = sizeof(struct dccp6_request_sock),
316         .rtx_syn_ack    = dccp_v6_send_response,
317         .send_ack       = dccp_reqsk_send_ack,
318         .destructor     = dccp_v6_reqsk_destructor,
319         .send_reset     = dccp_v6_ctl_send_reset,
320         .syn_ack_timeout = dccp_syn_ack_timeout,
321 };
322
323 static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
324 {
325         struct request_sock *req;
326         struct dccp_request_sock *dreq;
327         struct inet_request_sock *ireq;
328         struct ipv6_pinfo *np = inet6_sk(sk);
329         const __be32 service = dccp_hdr_request(skb)->dccph_req_service;
330         struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
331
332         if (skb->protocol == htons(ETH_P_IP))
333                 return dccp_v4_conn_request(sk, skb);
334
335         if (!ipv6_unicast_destination(skb))
336                 return 0;       /* discard, don't send a reset here */
337
338         if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) {
339                 __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS);
340                 return 0;
341         }
342
343         if (dccp_bad_service_code(sk, service)) {
344                 dcb->dccpd_reset_code = DCCP_RESET_CODE_BAD_SERVICE_CODE;
345                 goto drop;
346         }
347         /*
348          * There are no SYN attacks on IPv6, yet...
349          */
350         dcb->dccpd_reset_code = DCCP_RESET_CODE_TOO_BUSY;
351         if (inet_csk_reqsk_queue_is_full(sk))
352                 goto drop;
353
354         if (sk_acceptq_is_full(sk))
355                 goto drop;
356
357         req = inet_reqsk_alloc(&dccp6_request_sock_ops, sk, true);
358         if (req == NULL)
359                 goto drop;
360
361         if (dccp_reqsk_init(req, dccp_sk(sk), skb))
362                 goto drop_and_free;
363
364         dreq = dccp_rsk(req);
365         if (dccp_parse_options(sk, dreq, skb))
366                 goto drop_and_free;
367
368         if (security_inet_conn_request(sk, skb, req))
369                 goto drop_and_free;
370
371         ireq = inet_rsk(req);
372         ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
373         ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
374         ireq->ireq_family = AF_INET6;
375         ireq->ir_mark = inet_request_mark(sk, skb);
376
377         if (ipv6_opt_accepted(sk, skb, IP6CB(skb)) ||
378             np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
379             np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
380                 refcount_inc(&skb->users);
381                 ireq->pktopts = skb;
382         }
383         ireq->ir_iif = READ_ONCE(sk->sk_bound_dev_if);
384
385         /* So that link locals have meaning */
386         if (!ireq->ir_iif &&
387             ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
388                 ireq->ir_iif = inet6_iif(skb);
389
390         /*
391          * Step 3: Process LISTEN state
392          *
393          *   Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie
394          *
395          * Setting S.SWL/S.SWH to is deferred to dccp_create_openreq_child().
396          */
397         dreq->dreq_isr     = dcb->dccpd_seq;
398         dreq->dreq_gsr     = dreq->dreq_isr;
399         dreq->dreq_iss     = dccp_v6_init_sequence(skb);
400         dreq->dreq_gss     = dreq->dreq_iss;
401         dreq->dreq_service = service;
402
403         if (dccp_v6_send_response(sk, req))
404                 goto drop_and_free;
405
406         inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT);
407         reqsk_put(req);
408         return 0;
409
410 drop_and_free:
411         reqsk_free(req);
412 drop:
413         __DCCP_INC_STATS(DCCP_MIB_ATTEMPTFAILS);
414         return -1;
415 }
416
417 static struct sock *dccp_v6_request_recv_sock(const struct sock *sk,
418                                               struct sk_buff *skb,
419                                               struct request_sock *req,
420                                               struct dst_entry *dst,
421                                               struct request_sock *req_unhash,
422                                               bool *own_req)
423 {
424         struct inet_request_sock *ireq = inet_rsk(req);
425         struct ipv6_pinfo *newnp;
426         const struct ipv6_pinfo *np = inet6_sk(sk);
427         struct ipv6_txoptions *opt;
428         struct inet_sock *newinet;
429         struct dccp6_sock *newdp6;
430         struct sock *newsk;
431
432         if (skb->protocol == htons(ETH_P_IP)) {
433                 /*
434                  *      v6 mapped
435                  */
436                 newsk = dccp_v4_request_recv_sock(sk, skb, req, dst,
437                                                   req_unhash, own_req);
438                 if (newsk == NULL)
439                         return NULL;
440
441                 newdp6 = (struct dccp6_sock *)newsk;
442                 newinet = inet_sk(newsk);
443                 newinet->pinet6 = &newdp6->inet6;
444                 newnp = inet6_sk(newsk);
445
446                 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
447
448                 newnp->saddr = newsk->sk_v6_rcv_saddr;
449
450                 inet_csk(newsk)->icsk_af_ops = &dccp_ipv6_mapped;
451                 newsk->sk_backlog_rcv = dccp_v4_do_rcv;
452                 newnp->pktoptions  = NULL;
453                 newnp->opt         = NULL;
454                 newnp->ipv6_mc_list = NULL;
455                 newnp->ipv6_ac_list = NULL;
456                 newnp->ipv6_fl_list = NULL;
457                 newnp->mcast_oif   = inet_iif(skb);
458                 newnp->mcast_hops  = ip_hdr(skb)->ttl;
459
460                 /*
461                  * No need to charge this sock to the relevant IPv6 refcnt debug socks count
462                  * here, dccp_create_openreq_child now does this for us, see the comment in
463                  * that function for the gory details. -acme
464                  */
465
466                 /* It is tricky place. Until this moment IPv4 tcp
467                    worked with IPv6 icsk.icsk_af_ops.
468                    Sync it now.
469                  */
470                 dccp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
471
472                 return newsk;
473         }
474
475
476         if (sk_acceptq_is_full(sk))
477                 goto out_overflow;
478
479         if (!dst) {
480                 struct flowi6 fl6;
481
482                 dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_DCCP);
483                 if (!dst)
484                         goto out;
485         }
486
487         newsk = dccp_create_openreq_child(sk, req, skb);
488         if (newsk == NULL)
489                 goto out_nonewsk;
490
491         /*
492          * No need to charge this sock to the relevant IPv6 refcnt debug socks
493          * count here, dccp_create_openreq_child now does this for us, see the
494          * comment in that function for the gory details. -acme
495          */
496
497         ip6_dst_store(newsk, dst, NULL, NULL);
498         newsk->sk_route_caps = dst->dev->features & ~(NETIF_F_IP_CSUM |
499                                                       NETIF_F_TSO);
500         newdp6 = (struct dccp6_sock *)newsk;
501         newinet = inet_sk(newsk);
502         newinet->pinet6 = &newdp6->inet6;
503         newnp = inet6_sk(newsk);
504
505         memcpy(newnp, np, sizeof(struct ipv6_pinfo));
506
507         newsk->sk_v6_daddr      = ireq->ir_v6_rmt_addr;
508         newnp->saddr            = ireq->ir_v6_loc_addr;
509         newsk->sk_v6_rcv_saddr  = ireq->ir_v6_loc_addr;
510         newsk->sk_bound_dev_if  = ireq->ir_iif;
511
512         /* Now IPv6 options...
513
514            First: no IPv4 options.
515          */
516         newinet->inet_opt = NULL;
517
518         /* Clone RX bits */
519         newnp->rxopt.all = np->rxopt.all;
520
521         newnp->ipv6_mc_list = NULL;
522         newnp->ipv6_ac_list = NULL;
523         newnp->ipv6_fl_list = NULL;
524         newnp->pktoptions = NULL;
525         newnp->opt        = NULL;
526         newnp->mcast_oif  = inet6_iif(skb);
527         newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
528
529         /*
530          * Clone native IPv6 options from listening socket (if any)
531          *
532          * Yes, keeping reference count would be much more clever, but we make
533          * one more one thing there: reattach optmem to newsk.
534          */
535         opt = ireq->ipv6_opt;
536         if (!opt)
537                 opt = rcu_dereference(np->opt);
538         if (opt) {
539                 opt = ipv6_dup_options(newsk, opt);
540                 RCU_INIT_POINTER(newnp->opt, opt);
541         }
542         inet_csk(newsk)->icsk_ext_hdr_len = 0;
543         if (opt)
544                 inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
545                                                     opt->opt_flen;
546
547         dccp_sync_mss(newsk, dst_mtu(dst));
548
549         newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
550         newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
551
552         if (__inet_inherit_port(sk, newsk) < 0) {
553                 inet_csk_prepare_forced_close(newsk);
554                 dccp_done(newsk);
555                 goto out;
556         }
557         *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash), NULL);
558         /* Clone pktoptions received with SYN, if we own the req */
559         if (*own_req && ireq->pktopts) {
560                 newnp->pktoptions = skb_clone_and_charge_r(ireq->pktopts, newsk);
561                 consume_skb(ireq->pktopts);
562                 ireq->pktopts = NULL;
563         }
564
565         return newsk;
566
567 out_overflow:
568         __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
569 out_nonewsk:
570         dst_release(dst);
571 out:
572         __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENDROPS);
573         return NULL;
574 }
575
576 /* The socket must have it's spinlock held when we get
577  * here.
578  *
579  * We have a potential double-lock case here, so even when
580  * doing backlog processing we use the BH locking scheme.
581  * This is because we cannot sleep with the original spinlock
582  * held.
583  */
584 static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
585 {
586         struct ipv6_pinfo *np = inet6_sk(sk);
587         struct sk_buff *opt_skb = NULL;
588
589         /* Imagine: socket is IPv6. IPv4 packet arrives,
590            goes to IPv4 receive handler and backlogged.
591            From backlog it always goes here. Kerboom...
592            Fortunately, dccp_rcv_established and rcv_established
593            handle them correctly, but it is not case with
594            dccp_v6_hnd_req and dccp_v6_ctl_send_reset().   --ANK
595          */
596
597         if (skb->protocol == htons(ETH_P_IP))
598                 return dccp_v4_do_rcv(sk, skb);
599
600         if (sk_filter(sk, skb))
601                 goto discard;
602
603         /*
604          * socket locking is here for SMP purposes as backlog rcv is currently
605          * called with bh processing disabled.
606          */
607
608         /* Do Stevens' IPV6_PKTOPTIONS.
609
610            Yes, guys, it is the only place in our code, where we
611            may make it not affecting IPv4.
612            The rest of code is protocol independent,
613            and I do not like idea to uglify IPv4.
614
615            Actually, all the idea behind IPV6_PKTOPTIONS
616            looks not very well thought. For now we latch
617            options, received in the last packet, enqueued
618            by tcp. Feel free to propose better solution.
619                                                --ANK (980728)
620          */
621         if (np->rxopt.all)
622                 opt_skb = skb_clone_and_charge_r(skb, sk);
623
624         if (sk->sk_state == DCCP_OPEN) { /* Fast path */
625                 if (dccp_rcv_established(sk, skb, dccp_hdr(skb), skb->len))
626                         goto reset;
627                 if (opt_skb)
628                         goto ipv6_pktoptions;
629                 return 0;
630         }
631
632         /*
633          *  Step 3: Process LISTEN state
634          *     If S.state == LISTEN,
635          *       If P.type == Request or P contains a valid Init Cookie option,
636          *            (* Must scan the packet's options to check for Init
637          *               Cookies.  Only Init Cookies are processed here,
638          *               however; other options are processed in Step 8.  This
639          *               scan need only be performed if the endpoint uses Init
640          *               Cookies *)
641          *            (* Generate a new socket and switch to that socket *)
642          *            Set S := new socket for this port pair
643          *            S.state = RESPOND
644          *            Choose S.ISS (initial seqno) or set from Init Cookies
645          *            Initialize S.GAR := S.ISS
646          *            Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookies
647          *            Continue with S.state == RESPOND
648          *            (* A Response packet will be generated in Step 11 *)
649          *       Otherwise,
650          *            Generate Reset(No Connection) unless P.type == Reset
651          *            Drop packet and return
652          *
653          * NOTE: the check for the packet types is done in
654          *       dccp_rcv_state_process
655          */
656
657         if (dccp_rcv_state_process(sk, skb, dccp_hdr(skb), skb->len))
658                 goto reset;
659         if (opt_skb)
660                 goto ipv6_pktoptions;
661         return 0;
662
663 reset:
664         dccp_v6_ctl_send_reset(sk, skb);
665 discard:
666         if (opt_skb != NULL)
667                 __kfree_skb(opt_skb);
668         kfree_skb(skb);
669         return 0;
670
671 /* Handling IPV6_PKTOPTIONS skb the similar
672  * way it's done for net/ipv6/tcp_ipv6.c
673  */
674 ipv6_pktoptions:
675         if (!((1 << sk->sk_state) & (DCCPF_CLOSED | DCCPF_LISTEN))) {
676                 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
677                         np->mcast_oif = inet6_iif(opt_skb);
678                 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
679                         np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
680                 if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
681                         np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
682                 if (np->repflow)
683                         np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
684                 if (ipv6_opt_accepted(sk, opt_skb,
685                                       &DCCP_SKB_CB(opt_skb)->header.h6)) {
686                         memmove(IP6CB(opt_skb),
687                                 &DCCP_SKB_CB(opt_skb)->header.h6,
688                                 sizeof(struct inet6_skb_parm));
689                         opt_skb = xchg(&np->pktoptions, opt_skb);
690                 } else {
691                         __kfree_skb(opt_skb);
692                         opt_skb = xchg(&np->pktoptions, NULL);
693                 }
694         }
695
696         kfree_skb(opt_skb);
697         return 0;
698 }
699
700 static int dccp_v6_rcv(struct sk_buff *skb)
701 {
702         const struct dccp_hdr *dh;
703         bool refcounted;
704         struct sock *sk;
705         int min_cov;
706
707         /* Step 1: Check header basics */
708
709         if (dccp_invalid_packet(skb))
710                 goto discard_it;
711
712         /* Step 1: If header checksum is incorrect, drop packet and return. */
713         if (dccp_v6_csum_finish(skb, &ipv6_hdr(skb)->saddr,
714                                      &ipv6_hdr(skb)->daddr)) {
715                 DCCP_WARN("dropped packet with invalid checksum\n");
716                 goto discard_it;
717         }
718
719         dh = dccp_hdr(skb);
720
721         DCCP_SKB_CB(skb)->dccpd_seq  = dccp_hdr_seq(dh);
722         DCCP_SKB_CB(skb)->dccpd_type = dh->dccph_type;
723
724         if (dccp_packet_without_ack(skb))
725                 DCCP_SKB_CB(skb)->dccpd_ack_seq = DCCP_PKT_WITHOUT_ACK_SEQ;
726         else
727                 DCCP_SKB_CB(skb)->dccpd_ack_seq = dccp_hdr_ack_seq(skb);
728
729 lookup:
730         sk = __inet6_lookup_skb(&dccp_hashinfo, skb, __dccp_hdr_len(dh),
731                                 dh->dccph_sport, dh->dccph_dport,
732                                 inet6_iif(skb), 0, &refcounted);
733         if (!sk) {
734                 dccp_pr_debug("failed to look up flow ID in table and "
735                               "get corresponding socket\n");
736                 goto no_dccp_socket;
737         }
738
739         /*
740          * Step 2:
741          *      ... or S.state == TIMEWAIT,
742          *              Generate Reset(No Connection) unless P.type == Reset
743          *              Drop packet and return
744          */
745         if (sk->sk_state == DCCP_TIME_WAIT) {
746                 dccp_pr_debug("sk->sk_state == DCCP_TIME_WAIT: do_time_wait\n");
747                 inet_twsk_put(inet_twsk(sk));
748                 goto no_dccp_socket;
749         }
750
751         if (sk->sk_state == DCCP_NEW_SYN_RECV) {
752                 struct request_sock *req = inet_reqsk(sk);
753                 struct sock *nsk;
754
755                 sk = req->rsk_listener;
756                 if (unlikely(sk->sk_state != DCCP_LISTEN)) {
757                         inet_csk_reqsk_queue_drop_and_put(sk, req);
758                         goto lookup;
759                 }
760                 sock_hold(sk);
761                 refcounted = true;
762                 nsk = dccp_check_req(sk, skb, req);
763                 if (!nsk) {
764                         reqsk_put(req);
765                         goto discard_and_relse;
766                 }
767                 if (nsk == sk) {
768                         reqsk_put(req);
769                 } else if (dccp_child_process(sk, nsk, skb)) {
770                         dccp_v6_ctl_send_reset(sk, skb);
771                         goto discard_and_relse;
772                 } else {
773                         sock_put(sk);
774                         return 0;
775                 }
776         }
777         /*
778          * RFC 4340, sec. 9.2.1: Minimum Checksum Coverage
779          *      o if MinCsCov = 0, only packets with CsCov = 0 are accepted
780          *      o if MinCsCov > 0, also accept packets with CsCov >= MinCsCov
781          */
782         min_cov = dccp_sk(sk)->dccps_pcrlen;
783         if (dh->dccph_cscov  &&  (min_cov == 0 || dh->dccph_cscov < min_cov))  {
784                 dccp_pr_debug("Packet CsCov %d does not satisfy MinCsCov %d\n",
785                               dh->dccph_cscov, min_cov);
786                 /* FIXME: send Data Dropped option (see also dccp_v4_rcv) */
787                 goto discard_and_relse;
788         }
789
790         if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
791                 goto discard_and_relse;
792         nf_reset_ct(skb);
793
794         return __sk_receive_skb(sk, skb, 1, dh->dccph_doff * 4,
795                                 refcounted) ? -1 : 0;
796
797 no_dccp_socket:
798         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
799                 goto discard_it;
800         /*
801          * Step 2:
802          *      If no socket ...
803          *              Generate Reset(No Connection) unless P.type == Reset
804          *              Drop packet and return
805          */
806         if (dh->dccph_type != DCCP_PKT_RESET) {
807                 DCCP_SKB_CB(skb)->dccpd_reset_code =
808                                         DCCP_RESET_CODE_NO_CONNECTION;
809                 dccp_v6_ctl_send_reset(sk, skb);
810         }
811
812 discard_it:
813         kfree_skb(skb);
814         return 0;
815
816 discard_and_relse:
817         if (refcounted)
818                 sock_put(sk);
819         goto discard_it;
820 }
821
822 static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
823                            int addr_len)
824 {
825         struct sockaddr_in6 *usin = (struct sockaddr_in6 *)uaddr;
826         struct inet_connection_sock *icsk = inet_csk(sk);
827         struct inet_sock *inet = inet_sk(sk);
828         struct ipv6_pinfo *np = inet6_sk(sk);
829         struct dccp_sock *dp = dccp_sk(sk);
830         struct in6_addr *saddr = NULL, *final_p, final;
831         struct ipv6_txoptions *opt;
832         struct flowi6 fl6;
833         struct dst_entry *dst;
834         int addr_type;
835         int err;
836
837         dp->dccps_role = DCCP_ROLE_CLIENT;
838
839         if (addr_len < SIN6_LEN_RFC2133)
840                 return -EINVAL;
841
842         if (usin->sin6_family != AF_INET6)
843                 return -EAFNOSUPPORT;
844
845         memset(&fl6, 0, sizeof(fl6));
846
847         if (np->sndflow) {
848                 fl6.flowlabel = usin->sin6_flowinfo & IPV6_FLOWINFO_MASK;
849                 IP6_ECN_flow_init(fl6.flowlabel);
850                 if (fl6.flowlabel & IPV6_FLOWLABEL_MASK) {
851                         struct ip6_flowlabel *flowlabel;
852                         flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
853                         if (IS_ERR(flowlabel))
854                                 return -EINVAL;
855                         fl6_sock_release(flowlabel);
856                 }
857         }
858         /*
859          * connect() to INADDR_ANY means loopback (BSD'ism).
860          */
861         if (ipv6_addr_any(&usin->sin6_addr))
862                 usin->sin6_addr.s6_addr[15] = 1;
863
864         addr_type = ipv6_addr_type(&usin->sin6_addr);
865
866         if (addr_type & IPV6_ADDR_MULTICAST)
867                 return -ENETUNREACH;
868
869         if (addr_type & IPV6_ADDR_LINKLOCAL) {
870                 if (addr_len >= sizeof(struct sockaddr_in6) &&
871                     usin->sin6_scope_id) {
872                         /* If interface is set while binding, indices
873                          * must coincide.
874                          */
875                         if (sk->sk_bound_dev_if &&
876                             sk->sk_bound_dev_if != usin->sin6_scope_id)
877                                 return -EINVAL;
878
879                         sk->sk_bound_dev_if = usin->sin6_scope_id;
880                 }
881
882                 /* Connect to link-local address requires an interface */
883                 if (!sk->sk_bound_dev_if)
884                         return -EINVAL;
885         }
886
887         sk->sk_v6_daddr = usin->sin6_addr;
888         np->flow_label = fl6.flowlabel;
889
890         /*
891          * DCCP over IPv4
892          */
893         if (addr_type == IPV6_ADDR_MAPPED) {
894                 u32 exthdrlen = icsk->icsk_ext_hdr_len;
895                 struct sockaddr_in sin;
896
897                 SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
898
899                 if (ipv6_only_sock(sk))
900                         return -ENETUNREACH;
901
902                 sin.sin_family = AF_INET;
903                 sin.sin_port = usin->sin6_port;
904                 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
905
906                 icsk->icsk_af_ops = &dccp_ipv6_mapped;
907                 sk->sk_backlog_rcv = dccp_v4_do_rcv;
908
909                 err = dccp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
910                 if (err) {
911                         icsk->icsk_ext_hdr_len = exthdrlen;
912                         icsk->icsk_af_ops = &dccp_ipv6_af_ops;
913                         sk->sk_backlog_rcv = dccp_v6_do_rcv;
914                         goto failure;
915                 }
916                 np->saddr = sk->sk_v6_rcv_saddr;
917                 return err;
918         }
919
920         if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
921                 saddr = &sk->sk_v6_rcv_saddr;
922
923         fl6.flowi6_proto = IPPROTO_DCCP;
924         fl6.daddr = sk->sk_v6_daddr;
925         fl6.saddr = saddr ? *saddr : np->saddr;
926         fl6.flowi6_oif = sk->sk_bound_dev_if;
927         fl6.fl6_dport = usin->sin6_port;
928         fl6.fl6_sport = inet->inet_sport;
929         security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
930
931         opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
932         final_p = fl6_update_dst(&fl6, opt, &final);
933
934         dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
935         if (IS_ERR(dst)) {
936                 err = PTR_ERR(dst);
937                 goto failure;
938         }
939
940         if (saddr == NULL) {
941                 saddr = &fl6.saddr;
942
943                 err = inet_bhash2_update_saddr(sk, saddr, AF_INET6);
944                 if (err)
945                         goto failure;
946         }
947
948         /* set the source address */
949         np->saddr = *saddr;
950         inet->inet_rcv_saddr = LOOPBACK4_IPV6;
951
952         ip6_dst_store(sk, dst, NULL, NULL);
953
954         icsk->icsk_ext_hdr_len = 0;
955         if (opt)
956                 icsk->icsk_ext_hdr_len = opt->opt_flen + opt->opt_nflen;
957
958         inet->inet_dport = usin->sin6_port;
959
960         dccp_set_state(sk, DCCP_REQUESTING);
961         err = inet6_hash_connect(&dccp_death_row, sk);
962         if (err)
963                 goto late_failure;
964
965         dp->dccps_iss = secure_dccpv6_sequence_number(np->saddr.s6_addr32,
966                                                       sk->sk_v6_daddr.s6_addr32,
967                                                       inet->inet_sport,
968                                                       inet->inet_dport);
969         err = dccp_connect(sk);
970         if (err)
971                 goto late_failure;
972
973         return 0;
974
975 late_failure:
976         dccp_set_state(sk, DCCP_CLOSED);
977         inet_bhash2_reset_saddr(sk);
978         __sk_dst_reset(sk);
979 failure:
980         inet->inet_dport = 0;
981         sk->sk_route_caps = 0;
982         return err;
983 }
984
985 static const struct inet_connection_sock_af_ops dccp_ipv6_af_ops = {
986         .queue_xmit        = inet6_csk_xmit,
987         .send_check        = dccp_v6_send_check,
988         .rebuild_header    = inet6_sk_rebuild_header,
989         .conn_request      = dccp_v6_conn_request,
990         .syn_recv_sock     = dccp_v6_request_recv_sock,
991         .net_header_len    = sizeof(struct ipv6hdr),
992         .setsockopt        = ipv6_setsockopt,
993         .getsockopt        = ipv6_getsockopt,
994         .addr2sockaddr     = inet6_csk_addr2sockaddr,
995         .sockaddr_len      = sizeof(struct sockaddr_in6),
996 };
997
998 /*
999  *      DCCP over IPv4 via INET6 API
1000  */
1001 static const struct inet_connection_sock_af_ops dccp_ipv6_mapped = {
1002         .queue_xmit        = ip_queue_xmit,
1003         .send_check        = dccp_v4_send_check,
1004         .rebuild_header    = inet_sk_rebuild_header,
1005         .conn_request      = dccp_v6_conn_request,
1006         .syn_recv_sock     = dccp_v6_request_recv_sock,
1007         .net_header_len    = sizeof(struct iphdr),
1008         .setsockopt        = ipv6_setsockopt,
1009         .getsockopt        = ipv6_getsockopt,
1010         .addr2sockaddr     = inet6_csk_addr2sockaddr,
1011         .sockaddr_len      = sizeof(struct sockaddr_in6),
1012 };
1013
1014 static void dccp_v6_sk_destruct(struct sock *sk)
1015 {
1016         dccp_destruct_common(sk);
1017         inet6_sock_destruct(sk);
1018 }
1019
1020 /* NOTE: A lot of things set to zero explicitly by call to
1021  *       sk_alloc() so need not be done here.
1022  */
1023 static int dccp_v6_init_sock(struct sock *sk)
1024 {
1025         static __u8 dccp_v6_ctl_sock_initialized;
1026         int err = dccp_init_sock(sk, dccp_v6_ctl_sock_initialized);
1027
1028         if (err == 0) {
1029                 if (unlikely(!dccp_v6_ctl_sock_initialized))
1030                         dccp_v6_ctl_sock_initialized = 1;
1031                 inet_csk(sk)->icsk_af_ops = &dccp_ipv6_af_ops;
1032                 sk->sk_destruct = dccp_v6_sk_destruct;
1033         }
1034
1035         return err;
1036 }
1037
1038 static struct timewait_sock_ops dccp6_timewait_sock_ops = {
1039         .twsk_obj_size  = sizeof(struct dccp6_timewait_sock),
1040 };
1041
1042 static struct proto dccp_v6_prot = {
1043         .name              = "DCCPv6",
1044         .owner             = THIS_MODULE,
1045         .close             = dccp_close,
1046         .connect           = dccp_v6_connect,
1047         .disconnect        = dccp_disconnect,
1048         .ioctl             = dccp_ioctl,
1049         .init              = dccp_v6_init_sock,
1050         .setsockopt        = dccp_setsockopt,
1051         .getsockopt        = dccp_getsockopt,
1052         .sendmsg           = dccp_sendmsg,
1053         .recvmsg           = dccp_recvmsg,
1054         .backlog_rcv       = dccp_v6_do_rcv,
1055         .hash              = inet6_hash,
1056         .unhash            = inet_unhash,
1057         .accept            = inet_csk_accept,
1058         .get_port          = inet_csk_get_port,
1059         .shutdown          = dccp_shutdown,
1060         .destroy           = dccp_destroy_sock,
1061         .orphan_count      = &dccp_orphan_count,
1062         .max_header        = MAX_DCCP_HEADER,
1063         .obj_size          = sizeof(struct dccp6_sock),
1064         .ipv6_pinfo_offset = offsetof(struct dccp6_sock, inet6),
1065         .slab_flags        = SLAB_TYPESAFE_BY_RCU,
1066         .rsk_prot          = &dccp6_request_sock_ops,
1067         .twsk_prot         = &dccp6_timewait_sock_ops,
1068         .h.hashinfo        = &dccp_hashinfo,
1069 };
1070
1071 static const struct inet6_protocol dccp_v6_protocol = {
1072         .handler        = dccp_v6_rcv,
1073         .err_handler    = dccp_v6_err,
1074         .flags          = INET6_PROTO_NOPOLICY | INET6_PROTO_FINAL,
1075 };
1076
1077 static const struct proto_ops inet6_dccp_ops = {
1078         .family            = PF_INET6,
1079         .owner             = THIS_MODULE,
1080         .release           = inet6_release,
1081         .bind              = inet6_bind,
1082         .connect           = inet_stream_connect,
1083         .socketpair        = sock_no_socketpair,
1084         .accept            = inet_accept,
1085         .getname           = inet6_getname,
1086         .poll              = dccp_poll,
1087         .ioctl             = inet6_ioctl,
1088         .gettstamp         = sock_gettstamp,
1089         .listen            = inet_dccp_listen,
1090         .shutdown          = inet_shutdown,
1091         .setsockopt        = sock_common_setsockopt,
1092         .getsockopt        = sock_common_getsockopt,
1093         .sendmsg           = inet_sendmsg,
1094         .recvmsg           = sock_common_recvmsg,
1095         .mmap              = sock_no_mmap,
1096 #ifdef CONFIG_COMPAT
1097         .compat_ioctl      = inet6_compat_ioctl,
1098 #endif
1099 };
1100
1101 static struct inet_protosw dccp_v6_protosw = {
1102         .type           = SOCK_DCCP,
1103         .protocol       = IPPROTO_DCCP,
1104         .prot           = &dccp_v6_prot,
1105         .ops            = &inet6_dccp_ops,
1106         .flags          = INET_PROTOSW_ICSK,
1107 };
1108
1109 static int __net_init dccp_v6_init_net(struct net *net)
1110 {
1111         struct dccp_v6_pernet *pn = net_generic(net, dccp_v6_pernet_id);
1112
1113         if (dccp_hashinfo.bhash == NULL)
1114                 return -ESOCKTNOSUPPORT;
1115
1116         return inet_ctl_sock_create(&pn->v6_ctl_sk, PF_INET6,
1117                                     SOCK_DCCP, IPPROTO_DCCP, net);
1118 }
1119
1120 static void __net_exit dccp_v6_exit_net(struct net *net)
1121 {
1122         struct dccp_v6_pernet *pn = net_generic(net, dccp_v6_pernet_id);
1123
1124         inet_ctl_sock_destroy(pn->v6_ctl_sk);
1125 }
1126
1127 static void __net_exit dccp_v6_exit_batch(struct list_head *net_exit_list)
1128 {
1129         inet_twsk_purge(&dccp_hashinfo, AF_INET6);
1130 }
1131
1132 static struct pernet_operations dccp_v6_ops = {
1133         .init   = dccp_v6_init_net,
1134         .exit   = dccp_v6_exit_net,
1135         .exit_batch = dccp_v6_exit_batch,
1136         .id     = &dccp_v6_pernet_id,
1137         .size   = sizeof(struct dccp_v6_pernet),
1138 };
1139
1140 static int __init dccp_v6_init(void)
1141 {
1142         int err = proto_register(&dccp_v6_prot, 1);
1143
1144         if (err)
1145                 goto out;
1146
1147         inet6_register_protosw(&dccp_v6_protosw);
1148
1149         err = register_pernet_subsys(&dccp_v6_ops);
1150         if (err)
1151                 goto out_destroy_ctl_sock;
1152
1153         err = inet6_add_protocol(&dccp_v6_protocol, IPPROTO_DCCP);
1154         if (err)
1155                 goto out_unregister_proto;
1156
1157 out:
1158         return err;
1159 out_unregister_proto:
1160         unregister_pernet_subsys(&dccp_v6_ops);
1161 out_destroy_ctl_sock:
1162         inet6_unregister_protosw(&dccp_v6_protosw);
1163         proto_unregister(&dccp_v6_prot);
1164         goto out;
1165 }
1166
1167 static void __exit dccp_v6_exit(void)
1168 {
1169         inet6_del_protocol(&dccp_v6_protocol, IPPROTO_DCCP);
1170         unregister_pernet_subsys(&dccp_v6_ops);
1171         inet6_unregister_protosw(&dccp_v6_protosw);
1172         proto_unregister(&dccp_v6_prot);
1173 }
1174
1175 module_init(dccp_v6_init);
1176 module_exit(dccp_v6_exit);
1177
1178 /*
1179  * __stringify doesn't likes enums, so use SOCK_DCCP (6) and IPPROTO_DCCP (33)
1180  * values directly, Also cover the case where the protocol is not specified,
1181  * i.e. net-pf-PF_INET6-proto-0-type-SOCK_DCCP
1182  */
1183 MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET6, 33, 6);
1184 MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET6, 0, 6);
1185 MODULE_LICENSE("GPL");
1186 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@mandriva.com>");
1187 MODULE_DESCRIPTION("DCCPv6 - Datagram Congestion Controlled Protocol");