af_unix: call proto_unregister() in the error path in af_unix_init()
[platform/kernel/linux-rpi.git] / net / ipv6 / tcp_ipv6.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *      TCP over IPv6
4  *      Linux INET6 implementation
5  *
6  *      Authors:
7  *      Pedro Roque             <roque@di.fc.ul.pt>
8  *
9  *      Based on:
10  *      linux/net/ipv4/tcp.c
11  *      linux/net/ipv4/tcp_input.c
12  *      linux/net/ipv4/tcp_output.c
13  *
14  *      Fixes:
15  *      Hideaki YOSHIFUJI       :       sin6_scope_id support
16  *      YOSHIFUJI Hideaki @USAGI and:   Support IPV6_V6ONLY socket option, which
17  *      Alexey Kuznetsov                allow both IPv4 and IPv6 sockets to bind
18  *                                      a single port at the same time.
19  *      YOSHIFUJI Hideaki @USAGI:       convert /proc/net/tcp6 to seq_file.
20  */
21
22 #include <linux/bottom_half.h>
23 #include <linux/module.h>
24 #include <linux/errno.h>
25 #include <linux/types.h>
26 #include <linux/socket.h>
27 #include <linux/sockios.h>
28 #include <linux/net.h>
29 #include <linux/jiffies.h>
30 #include <linux/in.h>
31 #include <linux/in6.h>
32 #include <linux/netdevice.h>
33 #include <linux/init.h>
34 #include <linux/jhash.h>
35 #include <linux/ipsec.h>
36 #include <linux/times.h>
37 #include <linux/slab.h>
38 #include <linux/uaccess.h>
39 #include <linux/ipv6.h>
40 #include <linux/icmpv6.h>
41 #include <linux/random.h>
42 #include <linux/indirect_call_wrapper.h>
43
44 #include <net/tcp.h>
45 #include <net/ndisc.h>
46 #include <net/inet6_hashtables.h>
47 #include <net/inet6_connection_sock.h>
48 #include <net/ipv6.h>
49 #include <net/transp_v6.h>
50 #include <net/addrconf.h>
51 #include <net/ip6_route.h>
52 #include <net/ip6_checksum.h>
53 #include <net/inet_ecn.h>
54 #include <net/protocol.h>
55 #include <net/xfrm.h>
56 #include <net/snmp.h>
57 #include <net/dsfield.h>
58 #include <net/timewait_sock.h>
59 #include <net/inet_common.h>
60 #include <net/secure_seq.h>
61 #include <net/busy_poll.h>
62
63 #include <linux/proc_fs.h>
64 #include <linux/seq_file.h>
65
66 #include <crypto/hash.h>
67 #include <linux/scatterlist.h>
68
69 #include <trace/events/tcp.h>
70
71 static void     tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
72 static void     tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
73                                       struct request_sock *req);
74
75 static int      tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
76
77 static const struct inet_connection_sock_af_ops ipv6_mapped;
78 const struct inet_connection_sock_af_ops ipv6_specific;
79 #ifdef CONFIG_TCP_MD5SIG
80 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
81 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
82 #else
83 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
84                                                    const struct in6_addr *addr,
85                                                    int l3index)
86 {
87         return NULL;
88 }
89 #endif
90
91 /* Helper returning the inet6 address from a given tcp socket.
92  * It can be used in TCP stack instead of inet6_sk(sk).
93  * This avoids a dereference and allow compiler optimizations.
94  * It is a specialized version of inet6_sk_generic().
95  */
96 static struct ipv6_pinfo *tcp_inet6_sk(const struct sock *sk)
97 {
98         unsigned int offset = sizeof(struct tcp6_sock) - sizeof(struct ipv6_pinfo);
99
100         return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
101 }
102
103 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
104 {
105         struct dst_entry *dst = skb_dst(skb);
106
107         if (dst && dst_hold_safe(dst)) {
108                 const struct rt6_info *rt = (const struct rt6_info *)dst;
109
110                 rcu_assign_pointer(sk->sk_rx_dst, dst);
111                 sk->sk_rx_dst_ifindex = skb->skb_iif;
112                 sk->sk_rx_dst_cookie = rt6_get_cookie(rt);
113         }
114 }
115
116 static u32 tcp_v6_init_seq(const struct sk_buff *skb)
117 {
118         return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32,
119                                 ipv6_hdr(skb)->saddr.s6_addr32,
120                                 tcp_hdr(skb)->dest,
121                                 tcp_hdr(skb)->source);
122 }
123
124 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
125 {
126         return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32,
127                                    ipv6_hdr(skb)->saddr.s6_addr32);
128 }
129
130 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
131                               int addr_len)
132 {
133         /* This check is replicated from tcp_v6_connect() and intended to
134          * prevent BPF program called below from accessing bytes that are out
135          * of the bound specified by user in addr_len.
136          */
137         if (addr_len < SIN6_LEN_RFC2133)
138                 return -EINVAL;
139
140         sock_owned_by_me(sk);
141
142         return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr);
143 }
144
145 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
146                           int addr_len)
147 {
148         struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
149         struct inet_sock *inet = inet_sk(sk);
150         struct inet_connection_sock *icsk = inet_csk(sk);
151         struct ipv6_pinfo *np = tcp_inet6_sk(sk);
152         struct tcp_sock *tp = tcp_sk(sk);
153         struct in6_addr *saddr = NULL, *final_p, final;
154         struct ipv6_txoptions *opt;
155         struct flowi6 fl6;
156         struct dst_entry *dst;
157         int addr_type;
158         int err;
159         struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
160
161         if (addr_len < SIN6_LEN_RFC2133)
162                 return -EINVAL;
163
164         if (usin->sin6_family != AF_INET6)
165                 return -EAFNOSUPPORT;
166
167         memset(&fl6, 0, sizeof(fl6));
168
169         if (np->sndflow) {
170                 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
171                 IP6_ECN_flow_init(fl6.flowlabel);
172                 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
173                         struct ip6_flowlabel *flowlabel;
174                         flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
175                         if (IS_ERR(flowlabel))
176                                 return -EINVAL;
177                         fl6_sock_release(flowlabel);
178                 }
179         }
180
181         /*
182          *      connect() to INADDR_ANY means loopback (BSD'ism).
183          */
184
185         if (ipv6_addr_any(&usin->sin6_addr)) {
186                 if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
187                         ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
188                                                &usin->sin6_addr);
189                 else
190                         usin->sin6_addr = in6addr_loopback;
191         }
192
193         addr_type = ipv6_addr_type(&usin->sin6_addr);
194
195         if (addr_type & IPV6_ADDR_MULTICAST)
196                 return -ENETUNREACH;
197
198         if (addr_type&IPV6_ADDR_LINKLOCAL) {
199                 if (addr_len >= sizeof(struct sockaddr_in6) &&
200                     usin->sin6_scope_id) {
201                         /* If interface is set while binding, indices
202                          * must coincide.
203                          */
204                         if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id))
205                                 return -EINVAL;
206
207                         sk->sk_bound_dev_if = usin->sin6_scope_id;
208                 }
209
210                 /* Connect to link-local address requires an interface */
211                 if (!sk->sk_bound_dev_if)
212                         return -EINVAL;
213         }
214
215         if (tp->rx_opt.ts_recent_stamp &&
216             !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
217                 tp->rx_opt.ts_recent = 0;
218                 tp->rx_opt.ts_recent_stamp = 0;
219                 WRITE_ONCE(tp->write_seq, 0);
220         }
221
222         sk->sk_v6_daddr = usin->sin6_addr;
223         np->flow_label = fl6.flowlabel;
224
225         /*
226          *      TCP over IPv4
227          */
228
229         if (addr_type & IPV6_ADDR_MAPPED) {
230                 u32 exthdrlen = icsk->icsk_ext_hdr_len;
231                 struct sockaddr_in sin;
232
233                 if (__ipv6_only_sock(sk))
234                         return -ENETUNREACH;
235
236                 sin.sin_family = AF_INET;
237                 sin.sin_port = usin->sin6_port;
238                 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
239
240                 icsk->icsk_af_ops = &ipv6_mapped;
241                 if (sk_is_mptcp(sk))
242                         mptcpv6_handle_mapped(sk, true);
243                 sk->sk_backlog_rcv = tcp_v4_do_rcv;
244 #ifdef CONFIG_TCP_MD5SIG
245                 tp->af_specific = &tcp_sock_ipv6_mapped_specific;
246 #endif
247
248                 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
249
250                 if (err) {
251                         icsk->icsk_ext_hdr_len = exthdrlen;
252                         icsk->icsk_af_ops = &ipv6_specific;
253                         if (sk_is_mptcp(sk))
254                                 mptcpv6_handle_mapped(sk, false);
255                         sk->sk_backlog_rcv = tcp_v6_do_rcv;
256 #ifdef CONFIG_TCP_MD5SIG
257                         tp->af_specific = &tcp_sock_ipv6_specific;
258 #endif
259                         goto failure;
260                 }
261                 np->saddr = sk->sk_v6_rcv_saddr;
262
263                 return err;
264         }
265
266         if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
267                 saddr = &sk->sk_v6_rcv_saddr;
268
269         fl6.flowi6_proto = IPPROTO_TCP;
270         fl6.daddr = sk->sk_v6_daddr;
271         fl6.saddr = saddr ? *saddr : np->saddr;
272         fl6.flowi6_oif = sk->sk_bound_dev_if;
273         fl6.flowi6_mark = sk->sk_mark;
274         fl6.fl6_dport = usin->sin6_port;
275         fl6.fl6_sport = inet->inet_sport;
276         fl6.flowi6_uid = sk->sk_uid;
277
278         opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
279         final_p = fl6_update_dst(&fl6, opt, &final);
280
281         security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
282
283         dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
284         if (IS_ERR(dst)) {
285                 err = PTR_ERR(dst);
286                 goto failure;
287         }
288
289         if (!saddr) {
290                 saddr = &fl6.saddr;
291                 sk->sk_v6_rcv_saddr = *saddr;
292         }
293
294         /* set the source address */
295         np->saddr = *saddr;
296         inet->inet_rcv_saddr = LOOPBACK4_IPV6;
297
298         sk->sk_gso_type = SKB_GSO_TCPV6;
299         ip6_dst_store(sk, dst, NULL, NULL);
300
301         icsk->icsk_ext_hdr_len = 0;
302         if (opt)
303                 icsk->icsk_ext_hdr_len = opt->opt_flen +
304                                          opt->opt_nflen;
305
306         tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
307
308         inet->inet_dport = usin->sin6_port;
309
310         tcp_set_state(sk, TCP_SYN_SENT);
311         err = inet6_hash_connect(tcp_death_row, sk);
312         if (err)
313                 goto late_failure;
314
315         sk_set_txhash(sk);
316
317         if (likely(!tp->repair)) {
318                 if (!tp->write_seq)
319                         WRITE_ONCE(tp->write_seq,
320                                    secure_tcpv6_seq(np->saddr.s6_addr32,
321                                                     sk->sk_v6_daddr.s6_addr32,
322                                                     inet->inet_sport,
323                                                     inet->inet_dport));
324                 tp->tsoffset = secure_tcpv6_ts_off(sock_net(sk),
325                                                    np->saddr.s6_addr32,
326                                                    sk->sk_v6_daddr.s6_addr32);
327         }
328
329         if (tcp_fastopen_defer_connect(sk, &err))
330                 return err;
331         if (err)
332                 goto late_failure;
333
334         err = tcp_connect(sk);
335         if (err)
336                 goto late_failure;
337
338         return 0;
339
340 late_failure:
341         tcp_set_state(sk, TCP_CLOSE);
342         if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
343                 inet_reset_saddr(sk);
344 failure:
345         inet->inet_dport = 0;
346         sk->sk_route_caps = 0;
347         return err;
348 }
349
350 static void tcp_v6_mtu_reduced(struct sock *sk)
351 {
352         struct dst_entry *dst;
353         u32 mtu;
354
355         if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
356                 return;
357
358         mtu = READ_ONCE(tcp_sk(sk)->mtu_info);
359
360         /* Drop requests trying to increase our current mss.
361          * Check done in __ip6_rt_update_pmtu() is too late.
362          */
363         if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache)
364                 return;
365
366         dst = inet6_csk_update_pmtu(sk, mtu);
367         if (!dst)
368                 return;
369
370         if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
371                 tcp_sync_mss(sk, dst_mtu(dst));
372                 tcp_simple_retransmit(sk);
373         }
374 }
375
376 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
377                 u8 type, u8 code, int offset, __be32 info)
378 {
379         const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
380         const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
381         struct net *net = dev_net(skb->dev);
382         struct request_sock *fastopen;
383         struct ipv6_pinfo *np;
384         struct tcp_sock *tp;
385         __u32 seq, snd_una;
386         struct sock *sk;
387         bool fatal;
388         int err;
389
390         sk = __inet6_lookup_established(net, &tcp_hashinfo,
391                                         &hdr->daddr, th->dest,
392                                         &hdr->saddr, ntohs(th->source),
393                                         skb->dev->ifindex, inet6_sdif(skb));
394
395         if (!sk) {
396                 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
397                                   ICMP6_MIB_INERRORS);
398                 return -ENOENT;
399         }
400
401         if (sk->sk_state == TCP_TIME_WAIT) {
402                 inet_twsk_put(inet_twsk(sk));
403                 return 0;
404         }
405         seq = ntohl(th->seq);
406         fatal = icmpv6_err_convert(type, code, &err);
407         if (sk->sk_state == TCP_NEW_SYN_RECV) {
408                 tcp_req_err(sk, seq, fatal);
409                 return 0;
410         }
411
412         bh_lock_sock(sk);
413         if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
414                 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
415
416         if (sk->sk_state == TCP_CLOSE)
417                 goto out;
418
419         if (ipv6_hdr(skb)->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
420                 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
421                 goto out;
422         }
423
424         tp = tcp_sk(sk);
425         /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
426         fastopen = rcu_dereference(tp->fastopen_rsk);
427         snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
428         if (sk->sk_state != TCP_LISTEN &&
429             !between(seq, snd_una, tp->snd_nxt)) {
430                 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
431                 goto out;
432         }
433
434         np = tcp_inet6_sk(sk);
435
436         if (type == NDISC_REDIRECT) {
437                 if (!sock_owned_by_user(sk)) {
438                         struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
439
440                         if (dst)
441                                 dst->ops->redirect(dst, sk, skb);
442                 }
443                 goto out;
444         }
445
446         if (type == ICMPV6_PKT_TOOBIG) {
447                 u32 mtu = ntohl(info);
448
449                 /* We are not interested in TCP_LISTEN and open_requests
450                  * (SYN-ACKs send out by Linux are always <576bytes so
451                  * they should go through unfragmented).
452                  */
453                 if (sk->sk_state == TCP_LISTEN)
454                         goto out;
455
456                 if (!ip6_sk_accept_pmtu(sk))
457                         goto out;
458
459                 if (mtu < IPV6_MIN_MTU)
460                         goto out;
461
462                 WRITE_ONCE(tp->mtu_info, mtu);
463
464                 if (!sock_owned_by_user(sk))
465                         tcp_v6_mtu_reduced(sk);
466                 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
467                                            &sk->sk_tsq_flags))
468                         sock_hold(sk);
469                 goto out;
470         }
471
472
473         /* Might be for an request_sock */
474         switch (sk->sk_state) {
475         case TCP_SYN_SENT:
476         case TCP_SYN_RECV:
477                 /* Only in fast or simultaneous open. If a fast open socket is
478                  * already accepted it is treated as a connected one below.
479                  */
480                 if (fastopen && !fastopen->sk)
481                         break;
482
483                 ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th);
484
485                 if (!sock_owned_by_user(sk)) {
486                         sk->sk_err = err;
487                         sk_error_report(sk);            /* Wake people up to see the error (see connect in sock.c) */
488
489                         tcp_done(sk);
490                 } else
491                         sk->sk_err_soft = err;
492                 goto out;
493         case TCP_LISTEN:
494                 break;
495         default:
496                 /* check if this ICMP message allows revert of backoff.
497                  * (see RFC 6069)
498                  */
499                 if (!fastopen && type == ICMPV6_DEST_UNREACH &&
500                     code == ICMPV6_NOROUTE)
501                         tcp_ld_RTO_revert(sk, seq);
502         }
503
504         if (!sock_owned_by_user(sk) && np->recverr) {
505                 sk->sk_err = err;
506                 sk_error_report(sk);
507         } else
508                 sk->sk_err_soft = err;
509
510 out:
511         bh_unlock_sock(sk);
512         sock_put(sk);
513         return 0;
514 }
515
516
517 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
518                               struct flowi *fl,
519                               struct request_sock *req,
520                               struct tcp_fastopen_cookie *foc,
521                               enum tcp_synack_type synack_type,
522                               struct sk_buff *syn_skb)
523 {
524         struct inet_request_sock *ireq = inet_rsk(req);
525         struct ipv6_pinfo *np = tcp_inet6_sk(sk);
526         struct ipv6_txoptions *opt;
527         struct flowi6 *fl6 = &fl->u.ip6;
528         struct sk_buff *skb;
529         int err = -ENOMEM;
530         u8 tclass;
531
532         /* First, grab a route. */
533         if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
534                                                IPPROTO_TCP)) == NULL)
535                 goto done;
536
537         skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb);
538
539         if (skb) {
540                 __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
541                                     &ireq->ir_v6_rmt_addr);
542
543                 fl6->daddr = ireq->ir_v6_rmt_addr;
544                 if (np->repflow && ireq->pktopts)
545                         fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
546
547                 tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ?
548                                 (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
549                                 (np->tclass & INET_ECN_MASK) :
550                                 np->tclass;
551
552                 if (!INET_ECN_is_capable(tclass) &&
553                     tcp_bpf_ca_needs_ecn((struct sock *)req))
554                         tclass |= INET_ECN_ECT_0;
555
556                 rcu_read_lock();
557                 opt = ireq->ipv6_opt;
558                 if (!opt)
559                         opt = rcu_dereference(np->opt);
560                 err = ip6_xmit(sk, skb, fl6, skb->mark ? : sk->sk_mark, opt,
561                                tclass, sk->sk_priority);
562                 rcu_read_unlock();
563                 err = net_xmit_eval(err);
564         }
565
566 done:
567         return err;
568 }
569
570
571 static void tcp_v6_reqsk_destructor(struct request_sock *req)
572 {
573         kfree(inet_rsk(req)->ipv6_opt);
574         kfree_skb(inet_rsk(req)->pktopts);
575 }
576
577 #ifdef CONFIG_TCP_MD5SIG
578 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
579                                                    const struct in6_addr *addr,
580                                                    int l3index)
581 {
582         return tcp_md5_do_lookup(sk, l3index,
583                                  (union tcp_md5_addr *)addr, AF_INET6);
584 }
585
586 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
587                                                 const struct sock *addr_sk)
588 {
589         int l3index;
590
591         l3index = l3mdev_master_ifindex_by_index(sock_net(sk),
592                                                  addr_sk->sk_bound_dev_if);
593         return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr,
594                                     l3index);
595 }
596
597 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
598                                  sockptr_t optval, int optlen)
599 {
600         struct tcp_md5sig cmd;
601         struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
602         int l3index = 0;
603         u8 prefixlen;
604         u8 flags;
605
606         if (optlen < sizeof(cmd))
607                 return -EINVAL;
608
609         if (copy_from_sockptr(&cmd, optval, sizeof(cmd)))
610                 return -EFAULT;
611
612         if (sin6->sin6_family != AF_INET6)
613                 return -EINVAL;
614
615         flags = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX;
616
617         if (optname == TCP_MD5SIG_EXT &&
618             cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
619                 prefixlen = cmd.tcpm_prefixlen;
620                 if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
621                                         prefixlen > 32))
622                         return -EINVAL;
623         } else {
624                 prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
625         }
626
627         if (optname == TCP_MD5SIG_EXT && cmd.tcpm_ifindex &&
628             cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) {
629                 struct net_device *dev;
630
631                 rcu_read_lock();
632                 dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex);
633                 if (dev && netif_is_l3_master(dev))
634                         l3index = dev->ifindex;
635                 rcu_read_unlock();
636
637                 /* ok to reference set/not set outside of rcu;
638                  * right now device MUST be an L3 master
639                  */
640                 if (!dev || !l3index)
641                         return -EINVAL;
642         }
643
644         if (!cmd.tcpm_keylen) {
645                 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
646                         return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
647                                               AF_INET, prefixlen,
648                                               l3index, flags);
649                 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
650                                       AF_INET6, prefixlen, l3index, flags);
651         }
652
653         if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
654                 return -EINVAL;
655
656         if (ipv6_addr_v4mapped(&sin6->sin6_addr))
657                 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
658                                       AF_INET, prefixlen, l3index, flags,
659                                       cmd.tcpm_key, cmd.tcpm_keylen,
660                                       GFP_KERNEL);
661
662         return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
663                               AF_INET6, prefixlen, l3index, flags,
664                               cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
665 }
666
667 static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
668                                    const struct in6_addr *daddr,
669                                    const struct in6_addr *saddr,
670                                    const struct tcphdr *th, int nbytes)
671 {
672         struct tcp6_pseudohdr *bp;
673         struct scatterlist sg;
674         struct tcphdr *_th;
675
676         bp = hp->scratch;
677         /* 1. TCP pseudo-header (RFC2460) */
678         bp->saddr = *saddr;
679         bp->daddr = *daddr;
680         bp->protocol = cpu_to_be32(IPPROTO_TCP);
681         bp->len = cpu_to_be32(nbytes);
682
683         _th = (struct tcphdr *)(bp + 1);
684         memcpy(_th, th, sizeof(*th));
685         _th->check = 0;
686
687         sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
688         ahash_request_set_crypt(hp->md5_req, &sg, NULL,
689                                 sizeof(*bp) + sizeof(*th));
690         return crypto_ahash_update(hp->md5_req);
691 }
692
693 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
694                                const struct in6_addr *daddr, struct in6_addr *saddr,
695                                const struct tcphdr *th)
696 {
697         struct tcp_md5sig_pool *hp;
698         struct ahash_request *req;
699
700         hp = tcp_get_md5sig_pool();
701         if (!hp)
702                 goto clear_hash_noput;
703         req = hp->md5_req;
704
705         if (crypto_ahash_init(req))
706                 goto clear_hash;
707         if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
708                 goto clear_hash;
709         if (tcp_md5_hash_key(hp, key))
710                 goto clear_hash;
711         ahash_request_set_crypt(req, NULL, md5_hash, 0);
712         if (crypto_ahash_final(req))
713                 goto clear_hash;
714
715         tcp_put_md5sig_pool();
716         return 0;
717
718 clear_hash:
719         tcp_put_md5sig_pool();
720 clear_hash_noput:
721         memset(md5_hash, 0, 16);
722         return 1;
723 }
724
725 static int tcp_v6_md5_hash_skb(char *md5_hash,
726                                const struct tcp_md5sig_key *key,
727                                const struct sock *sk,
728                                const struct sk_buff *skb)
729 {
730         const struct in6_addr *saddr, *daddr;
731         struct tcp_md5sig_pool *hp;
732         struct ahash_request *req;
733         const struct tcphdr *th = tcp_hdr(skb);
734
735         if (sk) { /* valid for establish/request sockets */
736                 saddr = &sk->sk_v6_rcv_saddr;
737                 daddr = &sk->sk_v6_daddr;
738         } else {
739                 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
740                 saddr = &ip6h->saddr;
741                 daddr = &ip6h->daddr;
742         }
743
744         hp = tcp_get_md5sig_pool();
745         if (!hp)
746                 goto clear_hash_noput;
747         req = hp->md5_req;
748
749         if (crypto_ahash_init(req))
750                 goto clear_hash;
751
752         if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
753                 goto clear_hash;
754         if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
755                 goto clear_hash;
756         if (tcp_md5_hash_key(hp, key))
757                 goto clear_hash;
758         ahash_request_set_crypt(req, NULL, md5_hash, 0);
759         if (crypto_ahash_final(req))
760                 goto clear_hash;
761
762         tcp_put_md5sig_pool();
763         return 0;
764
765 clear_hash:
766         tcp_put_md5sig_pool();
767 clear_hash_noput:
768         memset(md5_hash, 0, 16);
769         return 1;
770 }
771
772 #endif
773
774 static bool tcp_v6_inbound_md5_hash(const struct sock *sk,
775                                     const struct sk_buff *skb,
776                                     int dif, int sdif)
777 {
778 #ifdef CONFIG_TCP_MD5SIG
779         const __u8 *hash_location = NULL;
780         struct tcp_md5sig_key *hash_expected;
781         const struct ipv6hdr *ip6h = ipv6_hdr(skb);
782         const struct tcphdr *th = tcp_hdr(skb);
783         int genhash, l3index;
784         u8 newhash[16];
785
786         /* sdif set, means packet ingressed via a device
787          * in an L3 domain and dif is set to the l3mdev
788          */
789         l3index = sdif ? dif : 0;
790
791         hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr, l3index);
792         hash_location = tcp_parse_md5sig_option(th);
793
794         /* We've parsed the options - do we have a hash? */
795         if (!hash_expected && !hash_location)
796                 return false;
797
798         if (hash_expected && !hash_location) {
799                 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
800                 return true;
801         }
802
803         if (!hash_expected && hash_location) {
804                 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
805                 return true;
806         }
807
808         /* check the signature */
809         genhash = tcp_v6_md5_hash_skb(newhash,
810                                       hash_expected,
811                                       NULL, skb);
812
813         if (genhash || memcmp(hash_location, newhash, 16) != 0) {
814                 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
815                 net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u L3 index %d\n",
816                                      genhash ? "failed" : "mismatch",
817                                      &ip6h->saddr, ntohs(th->source),
818                                      &ip6h->daddr, ntohs(th->dest), l3index);
819                 return true;
820         }
821 #endif
822         return false;
823 }
824
825 static void tcp_v6_init_req(struct request_sock *req,
826                             const struct sock *sk_listener,
827                             struct sk_buff *skb)
828 {
829         bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
830         struct inet_request_sock *ireq = inet_rsk(req);
831         const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener);
832
833         ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
834         ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
835
836         /* So that link locals have meaning */
837         if ((!sk_listener->sk_bound_dev_if || l3_slave) &&
838             ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
839                 ireq->ir_iif = tcp_v6_iif(skb);
840
841         if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
842             (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
843              np->rxopt.bits.rxinfo ||
844              np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
845              np->rxopt.bits.rxohlim || np->repflow)) {
846                 refcount_inc(&skb->users);
847                 ireq->pktopts = skb;
848         }
849 }
850
851 static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
852                                           struct sk_buff *skb,
853                                           struct flowi *fl,
854                                           struct request_sock *req)
855 {
856         tcp_v6_init_req(req, sk, skb);
857
858         if (security_inet_conn_request(sk, skb, req))
859                 return NULL;
860
861         return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
862 }
863
864 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
865         .family         =       AF_INET6,
866         .obj_size       =       sizeof(struct tcp6_request_sock),
867         .rtx_syn_ack    =       tcp_rtx_synack,
868         .send_ack       =       tcp_v6_reqsk_send_ack,
869         .destructor     =       tcp_v6_reqsk_destructor,
870         .send_reset     =       tcp_v6_send_reset,
871         .syn_ack_timeout =      tcp_syn_ack_timeout,
872 };
873
874 const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
875         .mss_clamp      =       IPV6_MIN_MTU - sizeof(struct tcphdr) -
876                                 sizeof(struct ipv6hdr),
877 #ifdef CONFIG_TCP_MD5SIG
878         .req_md5_lookup =       tcp_v6_md5_lookup,
879         .calc_md5_hash  =       tcp_v6_md5_hash_skb,
880 #endif
881 #ifdef CONFIG_SYN_COOKIES
882         .cookie_init_seq =      cookie_v6_init_sequence,
883 #endif
884         .route_req      =       tcp_v6_route_req,
885         .init_seq       =       tcp_v6_init_seq,
886         .init_ts_off    =       tcp_v6_init_ts_off,
887         .send_synack    =       tcp_v6_send_synack,
888 };
889
890 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
891                                  u32 ack, u32 win, u32 tsval, u32 tsecr,
892                                  int oif, struct tcp_md5sig_key *key, int rst,
893                                  u8 tclass, __be32 label, u32 priority)
894 {
895         const struct tcphdr *th = tcp_hdr(skb);
896         struct tcphdr *t1;
897         struct sk_buff *buff;
898         struct flowi6 fl6;
899         struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
900         struct sock *ctl_sk = net->ipv6.tcp_sk;
901         unsigned int tot_len = sizeof(struct tcphdr);
902         __be32 mrst = 0, *topt;
903         struct dst_entry *dst;
904         __u32 mark = 0;
905
906         if (tsecr)
907                 tot_len += TCPOLEN_TSTAMP_ALIGNED;
908 #ifdef CONFIG_TCP_MD5SIG
909         if (key)
910                 tot_len += TCPOLEN_MD5SIG_ALIGNED;
911 #endif
912
913 #ifdef CONFIG_MPTCP
914         if (rst && !key) {
915                 mrst = mptcp_reset_option(skb);
916
917                 if (mrst)
918                         tot_len += sizeof(__be32);
919         }
920 #endif
921
922         buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
923                          GFP_ATOMIC);
924         if (!buff)
925                 return;
926
927         skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
928
929         t1 = skb_push(buff, tot_len);
930         skb_reset_transport_header(buff);
931
932         /* Swap the send and the receive. */
933         memset(t1, 0, sizeof(*t1));
934         t1->dest = th->source;
935         t1->source = th->dest;
936         t1->doff = tot_len / 4;
937         t1->seq = htonl(seq);
938         t1->ack_seq = htonl(ack);
939         t1->ack = !rst || !th->ack;
940         t1->rst = rst;
941         t1->window = htons(win);
942
943         topt = (__be32 *)(t1 + 1);
944
945         if (tsecr) {
946                 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
947                                 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
948                 *topt++ = htonl(tsval);
949                 *topt++ = htonl(tsecr);
950         }
951
952         if (mrst)
953                 *topt++ = mrst;
954
955 #ifdef CONFIG_TCP_MD5SIG
956         if (key) {
957                 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
958                                 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
959                 tcp_v6_md5_hash_hdr((__u8 *)topt, key,
960                                     &ipv6_hdr(skb)->saddr,
961                                     &ipv6_hdr(skb)->daddr, t1);
962         }
963 #endif
964
965         memset(&fl6, 0, sizeof(fl6));
966         fl6.daddr = ipv6_hdr(skb)->saddr;
967         fl6.saddr = ipv6_hdr(skb)->daddr;
968         fl6.flowlabel = label;
969
970         buff->ip_summed = CHECKSUM_PARTIAL;
971         buff->csum = 0;
972
973         __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
974
975         fl6.flowi6_proto = IPPROTO_TCP;
976         if (rt6_need_strict(&fl6.daddr) && !oif)
977                 fl6.flowi6_oif = tcp_v6_iif(skb);
978         else {
979                 if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
980                         oif = skb->skb_iif;
981
982                 fl6.flowi6_oif = oif;
983         }
984
985         if (sk) {
986                 if (sk->sk_state == TCP_TIME_WAIT) {
987                         mark = inet_twsk(sk)->tw_mark;
988                         /* autoflowlabel relies on buff->hash */
989                         skb_set_hash(buff, inet_twsk(sk)->tw_txhash,
990                                      PKT_HASH_TYPE_L4);
991                 } else {
992                         mark = sk->sk_mark;
993                 }
994                 buff->tstamp = tcp_transmit_time(sk);
995         }
996         fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark;
997         fl6.fl6_dport = t1->dest;
998         fl6.fl6_sport = t1->source;
999         fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
1000         security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
1001
1002         /* Pass a socket to ip6_dst_lookup either it is for RST
1003          * Underlying function will use this to retrieve the network
1004          * namespace
1005          */
1006         if (sk && sk->sk_state != TCP_TIME_WAIT)
1007                 dst = ip6_dst_lookup_flow(net, sk, &fl6, NULL); /*sk's xfrm_policy can be referred*/
1008         else
1009                 dst = ip6_dst_lookup_flow(net, ctl_sk, &fl6, NULL);
1010         if (!IS_ERR(dst)) {
1011                 skb_dst_set(buff, dst);
1012                 ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL,
1013                          tclass & ~INET_ECN_MASK, priority);
1014                 TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
1015                 if (rst)
1016                         TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
1017                 return;
1018         }
1019
1020         kfree_skb(buff);
1021 }
1022
1023 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
1024 {
1025         const struct tcphdr *th = tcp_hdr(skb);
1026         struct ipv6hdr *ipv6h = ipv6_hdr(skb);
1027         u32 seq = 0, ack_seq = 0;
1028         struct tcp_md5sig_key *key = NULL;
1029 #ifdef CONFIG_TCP_MD5SIG
1030         const __u8 *hash_location = NULL;
1031         unsigned char newhash[16];
1032         int genhash;
1033         struct sock *sk1 = NULL;
1034 #endif
1035         __be32 label = 0;
1036         u32 priority = 0;
1037         struct net *net;
1038         int oif = 0;
1039
1040         if (th->rst)
1041                 return;
1042
1043         /* If sk not NULL, it means we did a successful lookup and incoming
1044          * route had to be correct. prequeue might have dropped our dst.
1045          */
1046         if (!sk && !ipv6_unicast_destination(skb))
1047                 return;
1048
1049         net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
1050 #ifdef CONFIG_TCP_MD5SIG
1051         rcu_read_lock();
1052         hash_location = tcp_parse_md5sig_option(th);
1053         if (sk && sk_fullsock(sk)) {
1054                 int l3index;
1055
1056                 /* sdif set, means packet ingressed via a device
1057                  * in an L3 domain and inet_iif is set to it.
1058                  */
1059                 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1060                 key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index);
1061         } else if (hash_location) {
1062                 int dif = tcp_v6_iif_l3_slave(skb);
1063                 int sdif = tcp_v6_sdif(skb);
1064                 int l3index;
1065
1066                 /*
1067                  * active side is lost. Try to find listening socket through
1068                  * source port, and then find md5 key through listening socket.
1069                  * we are not loose security here:
1070                  * Incoming packet is checked with md5 hash with finding key,
1071                  * no RST generated if md5 hash doesn't match.
1072                  */
1073                 sk1 = inet6_lookup_listener(net,
1074                                            &tcp_hashinfo, NULL, 0,
1075                                            &ipv6h->saddr,
1076                                            th->source, &ipv6h->daddr,
1077                                            ntohs(th->source), dif, sdif);
1078                 if (!sk1)
1079                         goto out;
1080
1081                 /* sdif set, means packet ingressed via a device
1082                  * in an L3 domain and dif is set to it.
1083                  */
1084                 l3index = tcp_v6_sdif(skb) ? dif : 0;
1085
1086                 key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index);
1087                 if (!key)
1088                         goto out;
1089
1090                 genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
1091                 if (genhash || memcmp(hash_location, newhash, 16) != 0)
1092                         goto out;
1093         }
1094 #endif
1095
1096         if (th->ack)
1097                 seq = ntohl(th->ack_seq);
1098         else
1099                 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
1100                           (th->doff << 2);
1101
1102         if (sk) {
1103                 oif = sk->sk_bound_dev_if;
1104                 if (sk_fullsock(sk)) {
1105                         const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1106
1107                         trace_tcp_send_reset(sk, skb);
1108                         if (np->repflow)
1109                                 label = ip6_flowlabel(ipv6h);
1110                         priority = sk->sk_priority;
1111                 }
1112                 if (sk->sk_state == TCP_TIME_WAIT) {
1113                         label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel);
1114                         priority = inet_twsk(sk)->tw_priority;
1115                 }
1116         } else {
1117                 if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET)
1118                         label = ip6_flowlabel(ipv6h);
1119         }
1120
1121         tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1,
1122                              ipv6_get_dsfield(ipv6h), label, priority);
1123
1124 #ifdef CONFIG_TCP_MD5SIG
1125 out:
1126         rcu_read_unlock();
1127 #endif
1128 }
1129
1130 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
1131                             u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
1132                             struct tcp_md5sig_key *key, u8 tclass,
1133                             __be32 label, u32 priority)
1134 {
1135         tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
1136                              tclass, label, priority);
1137 }
1138
1139 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1140 {
1141         struct inet_timewait_sock *tw = inet_twsk(sk);
1142         struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1143
1144         tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1145                         tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1146                         tcp_time_stamp_raw() + tcptw->tw_ts_offset,
1147                         tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
1148                         tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority);
1149
1150         inet_twsk_put(tw);
1151 }
1152
1153 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
1154                                   struct request_sock *req)
1155 {
1156         int l3index;
1157
1158         l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1159
1160         /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
1161          * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
1162          */
1163         /* RFC 7323 2.3
1164          * The window field (SEG.WND) of every outgoing segment, with the
1165          * exception of <SYN> segments, MUST be right-shifted by
1166          * Rcv.Wind.Shift bits:
1167          */
1168         tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
1169                         tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
1170                         tcp_rsk(req)->rcv_nxt,
1171                         req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
1172                         tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
1173                         req->ts_recent, sk->sk_bound_dev_if,
1174                         tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index),
1175                         ipv6_get_dsfield(ipv6_hdr(skb)), 0, sk->sk_priority);
1176 }
1177
1178
1179 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
1180 {
1181 #ifdef CONFIG_SYN_COOKIES
1182         const struct tcphdr *th = tcp_hdr(skb);
1183
1184         if (!th->syn)
1185                 sk = cookie_v6_check(sk, skb);
1186 #endif
1187         return sk;
1188 }
1189
1190 u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
1191                          struct tcphdr *th, u32 *cookie)
1192 {
1193         u16 mss = 0;
1194 #ifdef CONFIG_SYN_COOKIES
1195         mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops,
1196                                     &tcp_request_sock_ipv6_ops, sk, th);
1197         if (mss) {
1198                 *cookie = __cookie_v6_init_sequence(iph, th, &mss);
1199                 tcp_synq_overflow(sk);
1200         }
1201 #endif
1202         return mss;
1203 }
1204
1205 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1206 {
1207         if (skb->protocol == htons(ETH_P_IP))
1208                 return tcp_v4_conn_request(sk, skb);
1209
1210         if (!ipv6_unicast_destination(skb))
1211                 goto drop;
1212
1213         if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) {
1214                 __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS);
1215                 return 0;
1216         }
1217
1218         return tcp_conn_request(&tcp6_request_sock_ops,
1219                                 &tcp_request_sock_ipv6_ops, sk, skb);
1220
1221 drop:
1222         tcp_listendrop(sk);
1223         return 0; /* don't send reset */
1224 }
1225
1226 static void tcp_v6_restore_cb(struct sk_buff *skb)
1227 {
1228         /* We need to move header back to the beginning if xfrm6_policy_check()
1229          * and tcp_v6_fill_cb() are going to be called again.
1230          * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
1231          */
1232         memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
1233                 sizeof(struct inet6_skb_parm));
1234 }
1235
1236 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1237                                          struct request_sock *req,
1238                                          struct dst_entry *dst,
1239                                          struct request_sock *req_unhash,
1240                                          bool *own_req)
1241 {
1242         struct inet_request_sock *ireq;
1243         struct ipv6_pinfo *newnp;
1244         const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1245         struct ipv6_txoptions *opt;
1246         struct inet_sock *newinet;
1247         bool found_dup_sk = false;
1248         struct tcp_sock *newtp;
1249         struct sock *newsk;
1250 #ifdef CONFIG_TCP_MD5SIG
1251         struct tcp_md5sig_key *key;
1252         int l3index;
1253 #endif
1254         struct flowi6 fl6;
1255
1256         if (skb->protocol == htons(ETH_P_IP)) {
1257                 /*
1258                  *      v6 mapped
1259                  */
1260
1261                 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
1262                                              req_unhash, own_req);
1263
1264                 if (!newsk)
1265                         return NULL;
1266
1267                 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1268
1269                 newinet = inet_sk(newsk);
1270                 newnp = tcp_inet6_sk(newsk);
1271                 newtp = tcp_sk(newsk);
1272
1273                 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1274
1275                 newnp->saddr = newsk->sk_v6_rcv_saddr;
1276
1277                 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1278                 if (sk_is_mptcp(newsk))
1279                         mptcpv6_handle_mapped(newsk, true);
1280                 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1281 #ifdef CONFIG_TCP_MD5SIG
1282                 newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1283 #endif
1284
1285                 newnp->ipv6_mc_list = NULL;
1286                 newnp->ipv6_ac_list = NULL;
1287                 newnp->ipv6_fl_list = NULL;
1288                 newnp->pktoptions  = NULL;
1289                 newnp->opt         = NULL;
1290                 newnp->mcast_oif   = inet_iif(skb);
1291                 newnp->mcast_hops  = ip_hdr(skb)->ttl;
1292                 newnp->rcv_flowinfo = 0;
1293                 if (np->repflow)
1294                         newnp->flow_label = 0;
1295
1296                 /*
1297                  * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1298                  * here, tcp_create_openreq_child now does this for us, see the comment in
1299                  * that function for the gory details. -acme
1300                  */
1301
1302                 /* It is tricky place. Until this moment IPv4 tcp
1303                    worked with IPv6 icsk.icsk_af_ops.
1304                    Sync it now.
1305                  */
1306                 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1307
1308                 return newsk;
1309         }
1310
1311         ireq = inet_rsk(req);
1312
1313         if (sk_acceptq_is_full(sk))
1314                 goto out_overflow;
1315
1316         if (!dst) {
1317                 dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
1318                 if (!dst)
1319                         goto out;
1320         }
1321
1322         newsk = tcp_create_openreq_child(sk, req, skb);
1323         if (!newsk)
1324                 goto out_nonewsk;
1325
1326         /*
1327          * No need to charge this sock to the relevant IPv6 refcnt debug socks
1328          * count here, tcp_create_openreq_child now does this for us, see the
1329          * comment in that function for the gory details. -acme
1330          */
1331
1332         newsk->sk_gso_type = SKB_GSO_TCPV6;
1333         ip6_dst_store(newsk, dst, NULL, NULL);
1334         inet6_sk_rx_dst_set(newsk, skb);
1335
1336         inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1337
1338         newtp = tcp_sk(newsk);
1339         newinet = inet_sk(newsk);
1340         newnp = tcp_inet6_sk(newsk);
1341
1342         memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1343
1344         newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
1345         newnp->saddr = ireq->ir_v6_loc_addr;
1346         newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
1347         newsk->sk_bound_dev_if = ireq->ir_iif;
1348
1349         /* Now IPv6 options...
1350
1351            First: no IPv4 options.
1352          */
1353         newinet->inet_opt = NULL;
1354         newnp->ipv6_mc_list = NULL;
1355         newnp->ipv6_ac_list = NULL;
1356         newnp->ipv6_fl_list = NULL;
1357
1358         /* Clone RX bits */
1359         newnp->rxopt.all = np->rxopt.all;
1360
1361         newnp->pktoptions = NULL;
1362         newnp->opt        = NULL;
1363         newnp->mcast_oif  = tcp_v6_iif(skb);
1364         newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1365         newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1366         if (np->repflow)
1367                 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1368
1369         /* Set ToS of the new socket based upon the value of incoming SYN.
1370          * ECT bits are set later in tcp_init_transfer().
1371          */
1372         if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos))
1373                 newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;
1374
1375         /* Clone native IPv6 options from listening socket (if any)
1376
1377            Yes, keeping reference count would be much more clever,
1378            but we make one more one thing there: reattach optmem
1379            to newsk.
1380          */
1381         opt = ireq->ipv6_opt;
1382         if (!opt)
1383                 opt = rcu_dereference(np->opt);
1384         if (opt) {
1385                 opt = ipv6_dup_options(newsk, opt);
1386                 RCU_INIT_POINTER(newnp->opt, opt);
1387         }
1388         inet_csk(newsk)->icsk_ext_hdr_len = 0;
1389         if (opt)
1390                 inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
1391                                                     opt->opt_flen;
1392
1393         tcp_ca_openreq_child(newsk, dst);
1394
1395         tcp_sync_mss(newsk, dst_mtu(dst));
1396         newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1397
1398         tcp_initialize_rcv_mss(newsk);
1399
1400         newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1401         newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1402
1403 #ifdef CONFIG_TCP_MD5SIG
1404         l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif);
1405
1406         /* Copy over the MD5 key from the original socket */
1407         key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index);
1408         if (key) {
1409                 /* We're using one, so create a matching key
1410                  * on the newsk structure. If we fail to get
1411                  * memory, then we end up not copying the key
1412                  * across. Shucks.
1413                  */
1414                 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr,
1415                                AF_INET6, 128, l3index, key->flags, key->key, key->keylen,
1416                                sk_gfp_mask(sk, GFP_ATOMIC));
1417         }
1418 #endif
1419
1420         if (__inet_inherit_port(sk, newsk) < 0) {
1421                 inet_csk_prepare_forced_close(newsk);
1422                 tcp_done(newsk);
1423                 goto out;
1424         }
1425         *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash),
1426                                        &found_dup_sk);
1427         if (*own_req) {
1428                 tcp_move_syn(newtp, req);
1429
1430                 /* Clone pktoptions received with SYN, if we own the req */
1431                 if (ireq->pktopts) {
1432                         newnp->pktoptions = skb_clone(ireq->pktopts,
1433                                                       sk_gfp_mask(sk, GFP_ATOMIC));
1434                         consume_skb(ireq->pktopts);
1435                         ireq->pktopts = NULL;
1436                         if (newnp->pktoptions) {
1437                                 tcp_v6_restore_cb(newnp->pktoptions);
1438                                 skb_set_owner_r(newnp->pktoptions, newsk);
1439                         }
1440                 }
1441         } else {
1442                 if (!req_unhash && found_dup_sk) {
1443                         /* This code path should only be executed in the
1444                          * syncookie case only
1445                          */
1446                         bh_unlock_sock(newsk);
1447                         sock_put(newsk);
1448                         newsk = NULL;
1449                 }
1450         }
1451
1452         return newsk;
1453
1454 out_overflow:
1455         __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1456 out_nonewsk:
1457         dst_release(dst);
1458 out:
1459         tcp_listendrop(sk);
1460         return NULL;
1461 }
1462
1463 INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *,
1464                                                            u32));
1465 /* The socket must have it's spinlock held when we get
1466  * here, unless it is a TCP_LISTEN socket.
1467  *
1468  * We have a potential double-lock case here, so even when
1469  * doing backlog processing we use the BH locking scheme.
1470  * This is because we cannot sleep with the original spinlock
1471  * held.
1472  */
1473 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1474 {
1475         struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1476         struct sk_buff *opt_skb = NULL;
1477         struct tcp_sock *tp;
1478
1479         /* Imagine: socket is IPv6. IPv4 packet arrives,
1480            goes to IPv4 receive handler and backlogged.
1481            From backlog it always goes here. Kerboom...
1482            Fortunately, tcp_rcv_established and rcv_established
1483            handle them correctly, but it is not case with
1484            tcp_v6_hnd_req and tcp_v6_send_reset().   --ANK
1485          */
1486
1487         if (skb->protocol == htons(ETH_P_IP))
1488                 return tcp_v4_do_rcv(sk, skb);
1489
1490         /*
1491          *      socket locking is here for SMP purposes as backlog rcv
1492          *      is currently called with bh processing disabled.
1493          */
1494
1495         /* Do Stevens' IPV6_PKTOPTIONS.
1496
1497            Yes, guys, it is the only place in our code, where we
1498            may make it not affecting IPv4.
1499            The rest of code is protocol independent,
1500            and I do not like idea to uglify IPv4.
1501
1502            Actually, all the idea behind IPV6_PKTOPTIONS
1503            looks not very well thought. For now we latch
1504            options, received in the last packet, enqueued
1505            by tcp. Feel free to propose better solution.
1506                                                --ANK (980728)
1507          */
1508         if (np->rxopt.all)
1509                 opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC));
1510
1511         if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1512                 struct dst_entry *dst;
1513
1514                 dst = rcu_dereference_protected(sk->sk_rx_dst,
1515                                                 lockdep_sock_is_held(sk));
1516
1517                 sock_rps_save_rxhash(sk, skb);
1518                 sk_mark_napi_id(sk, skb);
1519                 if (dst) {
1520                         if (sk->sk_rx_dst_ifindex != skb->skb_iif ||
1521                             INDIRECT_CALL_1(dst->ops->check, ip6_dst_check,
1522                                             dst, sk->sk_rx_dst_cookie) == NULL) {
1523                                 RCU_INIT_POINTER(sk->sk_rx_dst, NULL);
1524                                 dst_release(dst);
1525                         }
1526                 }
1527
1528                 tcp_rcv_established(sk, skb);
1529                 if (opt_skb)
1530                         goto ipv6_pktoptions;
1531                 return 0;
1532         }
1533
1534         if (tcp_checksum_complete(skb))
1535                 goto csum_err;
1536
1537         if (sk->sk_state == TCP_LISTEN) {
1538                 struct sock *nsk = tcp_v6_cookie_check(sk, skb);
1539
1540                 if (!nsk)
1541                         goto discard;
1542
1543                 if (nsk != sk) {
1544                         if (tcp_child_process(sk, nsk, skb))
1545                                 goto reset;
1546                         if (opt_skb)
1547                                 __kfree_skb(opt_skb);
1548                         return 0;
1549                 }
1550         } else
1551                 sock_rps_save_rxhash(sk, skb);
1552
1553         if (tcp_rcv_state_process(sk, skb))
1554                 goto reset;
1555         if (opt_skb)
1556                 goto ipv6_pktoptions;
1557         return 0;
1558
1559 reset:
1560         tcp_v6_send_reset(sk, skb);
1561 discard:
1562         if (opt_skb)
1563                 __kfree_skb(opt_skb);
1564         kfree_skb(skb);
1565         return 0;
1566 csum_err:
1567         trace_tcp_bad_csum(skb);
1568         TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1569         TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1570         goto discard;
1571
1572
1573 ipv6_pktoptions:
1574         /* Do you ask, what is it?
1575
1576            1. skb was enqueued by tcp.
1577            2. skb is added to tail of read queue, rather than out of order.
1578            3. socket is not in passive state.
1579            4. Finally, it really contains options, which user wants to receive.
1580          */
1581         tp = tcp_sk(sk);
1582         if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1583             !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1584                 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1585                         np->mcast_oif = tcp_v6_iif(opt_skb);
1586                 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1587                         np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1588                 if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1589                         np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1590                 if (np->repflow)
1591                         np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1592                 if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
1593                         skb_set_owner_r(opt_skb, sk);
1594                         tcp_v6_restore_cb(opt_skb);
1595                         opt_skb = xchg(&np->pktoptions, opt_skb);
1596                 } else {
1597                         __kfree_skb(opt_skb);
1598                         opt_skb = xchg(&np->pktoptions, NULL);
1599                 }
1600         }
1601
1602         kfree_skb(opt_skb);
1603         return 0;
1604 }
1605
1606 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
1607                            const struct tcphdr *th)
1608 {
1609         /* This is tricky: we move IP6CB at its correct location into
1610          * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
1611          * _decode_session6() uses IP6CB().
1612          * barrier() makes sure compiler won't play aliasing games.
1613          */
1614         memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
1615                 sizeof(struct inet6_skb_parm));
1616         barrier();
1617
1618         TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1619         TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1620                                     skb->len - th->doff*4);
1621         TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1622         TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1623         TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1624         TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1625         TCP_SKB_CB(skb)->sacked = 0;
1626         TCP_SKB_CB(skb)->has_rxtstamp =
1627                         skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1628 }
1629
1630 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
1631 {
1632         struct sk_buff *skb_to_free;
1633         int sdif = inet6_sdif(skb);
1634         int dif = inet6_iif(skb);
1635         const struct tcphdr *th;
1636         const struct ipv6hdr *hdr;
1637         bool refcounted;
1638         struct sock *sk;
1639         int ret;
1640         struct net *net = dev_net(skb->dev);
1641
1642         if (skb->pkt_type != PACKET_HOST)
1643                 goto discard_it;
1644
1645         /*
1646          *      Count it even if it's bad.
1647          */
1648         __TCP_INC_STATS(net, TCP_MIB_INSEGS);
1649
1650         if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1651                 goto discard_it;
1652
1653         th = (const struct tcphdr *)skb->data;
1654
1655         if (unlikely(th->doff < sizeof(struct tcphdr)/4))
1656                 goto bad_packet;
1657         if (!pskb_may_pull(skb, th->doff*4))
1658                 goto discard_it;
1659
1660         if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1661                 goto csum_error;
1662
1663         th = (const struct tcphdr *)skb->data;
1664         hdr = ipv6_hdr(skb);
1665
1666 lookup:
1667         sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th),
1668                                 th->source, th->dest, inet6_iif(skb), sdif,
1669                                 &refcounted);
1670         if (!sk)
1671                 goto no_tcp_socket;
1672
1673 process:
1674         if (sk->sk_state == TCP_TIME_WAIT)
1675                 goto do_time_wait;
1676
1677         if (sk->sk_state == TCP_NEW_SYN_RECV) {
1678                 struct request_sock *req = inet_reqsk(sk);
1679                 bool req_stolen = false;
1680                 struct sock *nsk;
1681
1682                 sk = req->rsk_listener;
1683                 if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif)) {
1684                         sk_drops_add(sk, skb);
1685                         reqsk_put(req);
1686                         goto discard_it;
1687                 }
1688                 if (tcp_checksum_complete(skb)) {
1689                         reqsk_put(req);
1690                         goto csum_error;
1691                 }
1692                 if (unlikely(sk->sk_state != TCP_LISTEN)) {
1693                         nsk = reuseport_migrate_sock(sk, req_to_sk(req), skb);
1694                         if (!nsk) {
1695                                 inet_csk_reqsk_queue_drop_and_put(sk, req);
1696                                 goto lookup;
1697                         }
1698                         sk = nsk;
1699                         /* reuseport_migrate_sock() has already held one sk_refcnt
1700                          * before returning.
1701                          */
1702                 } else {
1703                         sock_hold(sk);
1704                 }
1705                 refcounted = true;
1706                 nsk = NULL;
1707                 if (!tcp_filter(sk, skb)) {
1708                         th = (const struct tcphdr *)skb->data;
1709                         hdr = ipv6_hdr(skb);
1710                         tcp_v6_fill_cb(skb, hdr, th);
1711                         nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
1712                 }
1713                 if (!nsk) {
1714                         reqsk_put(req);
1715                         if (req_stolen) {
1716                                 /* Another cpu got exclusive access to req
1717                                  * and created a full blown socket.
1718                                  * Try to feed this packet to this socket
1719                                  * instead of discarding it.
1720                                  */
1721                                 tcp_v6_restore_cb(skb);
1722                                 sock_put(sk);
1723                                 goto lookup;
1724                         }
1725                         goto discard_and_relse;
1726                 }
1727                 if (nsk == sk) {
1728                         reqsk_put(req);
1729                         tcp_v6_restore_cb(skb);
1730                 } else if (tcp_child_process(sk, nsk, skb)) {
1731                         tcp_v6_send_reset(nsk, skb);
1732                         goto discard_and_relse;
1733                 } else {
1734                         sock_put(sk);
1735                         return 0;
1736                 }
1737         }
1738         if (hdr->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
1739                 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1740                 goto discard_and_relse;
1741         }
1742
1743         if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1744                 goto discard_and_relse;
1745
1746         if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif))
1747                 goto discard_and_relse;
1748
1749         if (tcp_filter(sk, skb))
1750                 goto discard_and_relse;
1751         th = (const struct tcphdr *)skb->data;
1752         hdr = ipv6_hdr(skb);
1753         tcp_v6_fill_cb(skb, hdr, th);
1754
1755         skb->dev = NULL;
1756
1757         if (sk->sk_state == TCP_LISTEN) {
1758                 ret = tcp_v6_do_rcv(sk, skb);
1759                 goto put_and_return;
1760         }
1761
1762         sk_incoming_cpu_update(sk);
1763
1764         bh_lock_sock_nested(sk);
1765         tcp_segs_in(tcp_sk(sk), skb);
1766         ret = 0;
1767         if (!sock_owned_by_user(sk)) {
1768                 skb_to_free = sk->sk_rx_skb_cache;
1769                 sk->sk_rx_skb_cache = NULL;
1770                 ret = tcp_v6_do_rcv(sk, skb);
1771         } else {
1772                 if (tcp_add_backlog(sk, skb))
1773                         goto discard_and_relse;
1774                 skb_to_free = NULL;
1775         }
1776         bh_unlock_sock(sk);
1777         if (skb_to_free)
1778                 __kfree_skb(skb_to_free);
1779 put_and_return:
1780         if (refcounted)
1781                 sock_put(sk);
1782         return ret ? -1 : 0;
1783
1784 no_tcp_socket:
1785         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1786                 goto discard_it;
1787
1788         tcp_v6_fill_cb(skb, hdr, th);
1789
1790         if (tcp_checksum_complete(skb)) {
1791 csum_error:
1792                 trace_tcp_bad_csum(skb);
1793                 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1794 bad_packet:
1795                 __TCP_INC_STATS(net, TCP_MIB_INERRS);
1796         } else {
1797                 tcp_v6_send_reset(NULL, skb);
1798         }
1799
1800 discard_it:
1801         kfree_skb(skb);
1802         return 0;
1803
1804 discard_and_relse:
1805         sk_drops_add(sk, skb);
1806         if (refcounted)
1807                 sock_put(sk);
1808         goto discard_it;
1809
1810 do_time_wait:
1811         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1812                 inet_twsk_put(inet_twsk(sk));
1813                 goto discard_it;
1814         }
1815
1816         tcp_v6_fill_cb(skb, hdr, th);
1817
1818         if (tcp_checksum_complete(skb)) {
1819                 inet_twsk_put(inet_twsk(sk));
1820                 goto csum_error;
1821         }
1822
1823         switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1824         case TCP_TW_SYN:
1825         {
1826                 struct sock *sk2;
1827
1828                 sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
1829                                             skb, __tcp_hdrlen(th),
1830                                             &ipv6_hdr(skb)->saddr, th->source,
1831                                             &ipv6_hdr(skb)->daddr,
1832                                             ntohs(th->dest),
1833                                             tcp_v6_iif_l3_slave(skb),
1834                                             sdif);
1835                 if (sk2) {
1836                         struct inet_timewait_sock *tw = inet_twsk(sk);
1837                         inet_twsk_deschedule_put(tw);
1838                         sk = sk2;
1839                         tcp_v6_restore_cb(skb);
1840                         refcounted = false;
1841                         goto process;
1842                 }
1843         }
1844                 /* to ACK */
1845                 fallthrough;
1846         case TCP_TW_ACK:
1847                 tcp_v6_timewait_ack(sk, skb);
1848                 break;
1849         case TCP_TW_RST:
1850                 tcp_v6_send_reset(sk, skb);
1851                 inet_twsk_deschedule_put(inet_twsk(sk));
1852                 goto discard_it;
1853         case TCP_TW_SUCCESS:
1854                 ;
1855         }
1856         goto discard_it;
1857 }
1858
1859 void tcp_v6_early_demux(struct sk_buff *skb)
1860 {
1861         const struct ipv6hdr *hdr;
1862         const struct tcphdr *th;
1863         struct sock *sk;
1864
1865         if (skb->pkt_type != PACKET_HOST)
1866                 return;
1867
1868         if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1869                 return;
1870
1871         hdr = ipv6_hdr(skb);
1872         th = tcp_hdr(skb);
1873
1874         if (th->doff < sizeof(struct tcphdr) / 4)
1875                 return;
1876
1877         /* Note : We use inet6_iif() here, not tcp_v6_iif() */
1878         sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
1879                                         &hdr->saddr, th->source,
1880                                         &hdr->daddr, ntohs(th->dest),
1881                                         inet6_iif(skb), inet6_sdif(skb));
1882         if (sk) {
1883                 skb->sk = sk;
1884                 skb->destructor = sock_edemux;
1885                 if (sk_fullsock(sk)) {
1886                         struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst);
1887
1888                         if (dst)
1889                                 dst = dst_check(dst, sk->sk_rx_dst_cookie);
1890                         if (dst &&
1891                             sk->sk_rx_dst_ifindex == skb->skb_iif)
1892                                 skb_dst_set_noref(skb, dst);
1893                 }
1894         }
1895 }
1896
1897 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1898         .twsk_obj_size  = sizeof(struct tcp6_timewait_sock),
1899         .twsk_unique    = tcp_twsk_unique,
1900         .twsk_destructor = tcp_twsk_destructor,
1901 };
1902
1903 INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)
1904 {
1905         struct ipv6_pinfo *np = inet6_sk(sk);
1906
1907         __tcp_v6_send_check(skb, &np->saddr, &sk->sk_v6_daddr);
1908 }
1909
1910 const struct inet_connection_sock_af_ops ipv6_specific = {
1911         .queue_xmit        = inet6_csk_xmit,
1912         .send_check        = tcp_v6_send_check,
1913         .rebuild_header    = inet6_sk_rebuild_header,
1914         .sk_rx_dst_set     = inet6_sk_rx_dst_set,
1915         .conn_request      = tcp_v6_conn_request,
1916         .syn_recv_sock     = tcp_v6_syn_recv_sock,
1917         .net_header_len    = sizeof(struct ipv6hdr),
1918         .net_frag_header_len = sizeof(struct frag_hdr),
1919         .setsockopt        = ipv6_setsockopt,
1920         .getsockopt        = ipv6_getsockopt,
1921         .addr2sockaddr     = inet6_csk_addr2sockaddr,
1922         .sockaddr_len      = sizeof(struct sockaddr_in6),
1923         .mtu_reduced       = tcp_v6_mtu_reduced,
1924 };
1925
1926 #ifdef CONFIG_TCP_MD5SIG
1927 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1928         .md5_lookup     =       tcp_v6_md5_lookup,
1929         .calc_md5_hash  =       tcp_v6_md5_hash_skb,
1930         .md5_parse      =       tcp_v6_parse_md5_keys,
1931 };
1932 #endif
1933
1934 /*
1935  *      TCP over IPv4 via INET6 API
1936  */
1937 static const struct inet_connection_sock_af_ops ipv6_mapped = {
1938         .queue_xmit        = ip_queue_xmit,
1939         .send_check        = tcp_v4_send_check,
1940         .rebuild_header    = inet_sk_rebuild_header,
1941         .sk_rx_dst_set     = inet_sk_rx_dst_set,
1942         .conn_request      = tcp_v6_conn_request,
1943         .syn_recv_sock     = tcp_v6_syn_recv_sock,
1944         .net_header_len    = sizeof(struct iphdr),
1945         .setsockopt        = ipv6_setsockopt,
1946         .getsockopt        = ipv6_getsockopt,
1947         .addr2sockaddr     = inet6_csk_addr2sockaddr,
1948         .sockaddr_len      = sizeof(struct sockaddr_in6),
1949         .mtu_reduced       = tcp_v4_mtu_reduced,
1950 };
1951
1952 #ifdef CONFIG_TCP_MD5SIG
1953 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1954         .md5_lookup     =       tcp_v4_md5_lookup,
1955         .calc_md5_hash  =       tcp_v4_md5_hash_skb,
1956         .md5_parse      =       tcp_v6_parse_md5_keys,
1957 };
1958 #endif
1959
1960 /* NOTE: A lot of things set to zero explicitly by call to
1961  *       sk_alloc() so need not be done here.
1962  */
1963 static int tcp_v6_init_sock(struct sock *sk)
1964 {
1965         struct inet_connection_sock *icsk = inet_csk(sk);
1966
1967         tcp_init_sock(sk);
1968
1969         icsk->icsk_af_ops = &ipv6_specific;
1970
1971 #ifdef CONFIG_TCP_MD5SIG
1972         tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1973 #endif
1974
1975         return 0;
1976 }
1977
1978 static void tcp_v6_destroy_sock(struct sock *sk)
1979 {
1980         tcp_v4_destroy_sock(sk);
1981         inet6_destroy_sock(sk);
1982 }
1983
1984 #ifdef CONFIG_PROC_FS
1985 /* Proc filesystem TCPv6 sock list dumping. */
1986 static void get_openreq6(struct seq_file *seq,
1987                          const struct request_sock *req, int i)
1988 {
1989         long ttd = req->rsk_timer.expires - jiffies;
1990         const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
1991         const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
1992
1993         if (ttd < 0)
1994                 ttd = 0;
1995
1996         seq_printf(seq,
1997                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1998                    "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
1999                    i,
2000                    src->s6_addr32[0], src->s6_addr32[1],
2001                    src->s6_addr32[2], src->s6_addr32[3],
2002                    inet_rsk(req)->ir_num,
2003                    dest->s6_addr32[0], dest->s6_addr32[1],
2004                    dest->s6_addr32[2], dest->s6_addr32[3],
2005                    ntohs(inet_rsk(req)->ir_rmt_port),
2006                    TCP_SYN_RECV,
2007                    0, 0, /* could print option size, but that is af dependent. */
2008                    1,   /* timers active (only the expire timer) */
2009                    jiffies_to_clock_t(ttd),
2010                    req->num_timeout,
2011                    from_kuid_munged(seq_user_ns(seq),
2012                                     sock_i_uid(req->rsk_listener)),
2013                    0,  /* non standard timer */
2014                    0, /* open_requests have no inode */
2015                    0, req);
2016 }
2017
2018 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
2019 {
2020         const struct in6_addr *dest, *src;
2021         __u16 destp, srcp;
2022         int timer_active;
2023         unsigned long timer_expires;
2024         const struct inet_sock *inet = inet_sk(sp);
2025         const struct tcp_sock *tp = tcp_sk(sp);
2026         const struct inet_connection_sock *icsk = inet_csk(sp);
2027         const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
2028         int rx_queue;
2029         int state;
2030
2031         dest  = &sp->sk_v6_daddr;
2032         src   = &sp->sk_v6_rcv_saddr;
2033         destp = ntohs(inet->inet_dport);
2034         srcp  = ntohs(inet->inet_sport);
2035
2036         if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
2037             icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
2038             icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
2039                 timer_active    = 1;
2040                 timer_expires   = icsk->icsk_timeout;
2041         } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2042                 timer_active    = 4;
2043                 timer_expires   = icsk->icsk_timeout;
2044         } else if (timer_pending(&sp->sk_timer)) {
2045                 timer_active    = 2;
2046                 timer_expires   = sp->sk_timer.expires;
2047         } else {
2048                 timer_active    = 0;
2049                 timer_expires = jiffies;
2050         }
2051
2052         state = inet_sk_state_load(sp);
2053         if (state == TCP_LISTEN)
2054                 rx_queue = READ_ONCE(sp->sk_ack_backlog);
2055         else
2056                 /* Because we don't lock the socket,
2057                  * we might find a transient negative value.
2058                  */
2059                 rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
2060                                       READ_ONCE(tp->copied_seq), 0);
2061
2062         seq_printf(seq,
2063                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2064                    "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
2065                    i,
2066                    src->s6_addr32[0], src->s6_addr32[1],
2067                    src->s6_addr32[2], src->s6_addr32[3], srcp,
2068                    dest->s6_addr32[0], dest->s6_addr32[1],
2069                    dest->s6_addr32[2], dest->s6_addr32[3], destp,
2070                    state,
2071                    READ_ONCE(tp->write_seq) - tp->snd_una,
2072                    rx_queue,
2073                    timer_active,
2074                    jiffies_delta_to_clock_t(timer_expires - jiffies),
2075                    icsk->icsk_retransmits,
2076                    from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
2077                    icsk->icsk_probes_out,
2078                    sock_i_ino(sp),
2079                    refcount_read(&sp->sk_refcnt), sp,
2080                    jiffies_to_clock_t(icsk->icsk_rto),
2081                    jiffies_to_clock_t(icsk->icsk_ack.ato),
2082                    (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp),
2083                    tcp_snd_cwnd(tp),
2084                    state == TCP_LISTEN ?
2085                         fastopenq->max_qlen :
2086                         (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
2087                    );
2088 }
2089
2090 static void get_timewait6_sock(struct seq_file *seq,
2091                                struct inet_timewait_sock *tw, int i)
2092 {
2093         long delta = tw->tw_timer.expires - jiffies;
2094         const struct in6_addr *dest, *src;
2095         __u16 destp, srcp;
2096
2097         dest = &tw->tw_v6_daddr;
2098         src  = &tw->tw_v6_rcv_saddr;
2099         destp = ntohs(tw->tw_dport);
2100         srcp  = ntohs(tw->tw_sport);
2101
2102         seq_printf(seq,
2103                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2104                    "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
2105                    i,
2106                    src->s6_addr32[0], src->s6_addr32[1],
2107                    src->s6_addr32[2], src->s6_addr32[3], srcp,
2108                    dest->s6_addr32[0], dest->s6_addr32[1],
2109                    dest->s6_addr32[2], dest->s6_addr32[3], destp,
2110                    tw->tw_substate, 0, 0,
2111                    3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
2112                    refcount_read(&tw->tw_refcnt), tw);
2113 }
2114
2115 static int tcp6_seq_show(struct seq_file *seq, void *v)
2116 {
2117         struct tcp_iter_state *st;
2118         struct sock *sk = v;
2119
2120         if (v == SEQ_START_TOKEN) {
2121                 seq_puts(seq,
2122                          "  sl  "
2123                          "local_address                         "
2124                          "remote_address                        "
2125                          "st tx_queue rx_queue tr tm->when retrnsmt"
2126                          "   uid  timeout inode\n");
2127                 goto out;
2128         }
2129         st = seq->private;
2130
2131         if (sk->sk_state == TCP_TIME_WAIT)
2132                 get_timewait6_sock(seq, v, st->num);
2133         else if (sk->sk_state == TCP_NEW_SYN_RECV)
2134                 get_openreq6(seq, v, st->num);
2135         else
2136                 get_tcp6_sock(seq, v, st->num);
2137 out:
2138         return 0;
2139 }
2140
2141 static const struct seq_operations tcp6_seq_ops = {
2142         .show           = tcp6_seq_show,
2143         .start          = tcp_seq_start,
2144         .next           = tcp_seq_next,
2145         .stop           = tcp_seq_stop,
2146 };
2147
2148 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2149         .family         = AF_INET6,
2150 };
2151
2152 int __net_init tcp6_proc_init(struct net *net)
2153 {
2154         if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops,
2155                         sizeof(struct tcp_iter_state), &tcp6_seq_afinfo))
2156                 return -ENOMEM;
2157         return 0;
2158 }
2159
2160 void tcp6_proc_exit(struct net *net)
2161 {
2162         remove_proc_entry("tcp6", net->proc_net);
2163 }
2164 #endif
2165
2166 struct proto tcpv6_prot = {
2167         .name                   = "TCPv6",
2168         .owner                  = THIS_MODULE,
2169         .close                  = tcp_close,
2170         .pre_connect            = tcp_v6_pre_connect,
2171         .connect                = tcp_v6_connect,
2172         .disconnect             = tcp_disconnect,
2173         .accept                 = inet_csk_accept,
2174         .ioctl                  = tcp_ioctl,
2175         .init                   = tcp_v6_init_sock,
2176         .destroy                = tcp_v6_destroy_sock,
2177         .shutdown               = tcp_shutdown,
2178         .setsockopt             = tcp_setsockopt,
2179         .getsockopt             = tcp_getsockopt,
2180         .bpf_bypass_getsockopt  = tcp_bpf_bypass_getsockopt,
2181         .keepalive              = tcp_set_keepalive,
2182         .recvmsg                = tcp_recvmsg,
2183         .sendmsg                = tcp_sendmsg,
2184         .sendpage               = tcp_sendpage,
2185         .backlog_rcv            = tcp_v6_do_rcv,
2186         .release_cb             = tcp_release_cb,
2187         .hash                   = inet6_hash,
2188         .unhash                 = inet_unhash,
2189         .get_port               = inet_csk_get_port,
2190 #ifdef CONFIG_BPF_SYSCALL
2191         .psock_update_sk_prot   = tcp_bpf_update_proto,
2192 #endif
2193         .enter_memory_pressure  = tcp_enter_memory_pressure,
2194         .leave_memory_pressure  = tcp_leave_memory_pressure,
2195         .stream_memory_free     = tcp_stream_memory_free,
2196         .sockets_allocated      = &tcp_sockets_allocated,
2197         .memory_allocated       = &tcp_memory_allocated,
2198         .memory_pressure        = &tcp_memory_pressure,
2199         .orphan_count           = &tcp_orphan_count,
2200         .sysctl_mem             = sysctl_tcp_mem,
2201         .sysctl_wmem_offset     = offsetof(struct net, ipv4.sysctl_tcp_wmem),
2202         .sysctl_rmem_offset     = offsetof(struct net, ipv4.sysctl_tcp_rmem),
2203         .max_header             = MAX_TCP_HEADER,
2204         .obj_size               = sizeof(struct tcp6_sock),
2205         .slab_flags             = SLAB_TYPESAFE_BY_RCU,
2206         .twsk_prot              = &tcp6_timewait_sock_ops,
2207         .rsk_prot               = &tcp6_request_sock_ops,
2208         .h.hashinfo             = &tcp_hashinfo,
2209         .no_autobind            = true,
2210         .diag_destroy           = tcp_abort,
2211 };
2212 EXPORT_SYMBOL_GPL(tcpv6_prot);
2213
2214 static const struct inet6_protocol tcpv6_protocol = {
2215         .handler        =       tcp_v6_rcv,
2216         .err_handler    =       tcp_v6_err,
2217         .flags          =       INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2218 };
2219
2220 static struct inet_protosw tcpv6_protosw = {
2221         .type           =       SOCK_STREAM,
2222         .protocol       =       IPPROTO_TCP,
2223         .prot           =       &tcpv6_prot,
2224         .ops            =       &inet6_stream_ops,
2225         .flags          =       INET_PROTOSW_PERMANENT |
2226                                 INET_PROTOSW_ICSK,
2227 };
2228
2229 static int __net_init tcpv6_net_init(struct net *net)
2230 {
2231         return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2232                                     SOCK_RAW, IPPROTO_TCP, net);
2233 }
2234
2235 static void __net_exit tcpv6_net_exit(struct net *net)
2236 {
2237         inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2238 }
2239
2240 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
2241 {
2242         inet_twsk_purge(&tcp_hashinfo, AF_INET6);
2243 }
2244
2245 static struct pernet_operations tcpv6_net_ops = {
2246         .init       = tcpv6_net_init,
2247         .exit       = tcpv6_net_exit,
2248         .exit_batch = tcpv6_net_exit_batch,
2249 };
2250
2251 int __init tcpv6_init(void)
2252 {
2253         int ret;
2254
2255         ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2256         if (ret)
2257                 goto out;
2258
2259         /* register inet6 protocol */
2260         ret = inet6_register_protosw(&tcpv6_protosw);
2261         if (ret)
2262                 goto out_tcpv6_protocol;
2263
2264         ret = register_pernet_subsys(&tcpv6_net_ops);
2265         if (ret)
2266                 goto out_tcpv6_protosw;
2267
2268         ret = mptcpv6_init();
2269         if (ret)
2270                 goto out_tcpv6_pernet_subsys;
2271
2272 out:
2273         return ret;
2274
2275 out_tcpv6_pernet_subsys:
2276         unregister_pernet_subsys(&tcpv6_net_ops);
2277 out_tcpv6_protosw:
2278         inet6_unregister_protosw(&tcpv6_protosw);
2279 out_tcpv6_protocol:
2280         inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2281         goto out;
2282 }
2283
2284 void tcpv6_exit(void)
2285 {
2286         unregister_pernet_subsys(&tcpv6_net_ops);
2287         inet6_unregister_protosw(&tcpv6_protosw);
2288         inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2289 }