2a3f9296df1e505b40e925c31b0d2aa2a2327cfd
[platform/kernel/linux-rpi.git] / net / ipv6 / tcp_ipv6.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *      TCP over IPv6
4  *      Linux INET6 implementation
5  *
6  *      Authors:
7  *      Pedro Roque             <roque@di.fc.ul.pt>
8  *
9  *      Based on:
10  *      linux/net/ipv4/tcp.c
11  *      linux/net/ipv4/tcp_input.c
12  *      linux/net/ipv4/tcp_output.c
13  *
14  *      Fixes:
15  *      Hideaki YOSHIFUJI       :       sin6_scope_id support
16  *      YOSHIFUJI Hideaki @USAGI and:   Support IPV6_V6ONLY socket option, which
17  *      Alexey Kuznetsov                allow both IPv4 and IPv6 sockets to bind
18  *                                      a single port at the same time.
19  *      YOSHIFUJI Hideaki @USAGI:       convert /proc/net/tcp6 to seq_file.
20  */
21
22 #include <linux/bottom_half.h>
23 #include <linux/module.h>
24 #include <linux/errno.h>
25 #include <linux/types.h>
26 #include <linux/socket.h>
27 #include <linux/sockios.h>
28 #include <linux/net.h>
29 #include <linux/jiffies.h>
30 #include <linux/in.h>
31 #include <linux/in6.h>
32 #include <linux/netdevice.h>
33 #include <linux/init.h>
34 #include <linux/jhash.h>
35 #include <linux/ipsec.h>
36 #include <linux/times.h>
37 #include <linux/slab.h>
38 #include <linux/uaccess.h>
39 #include <linux/ipv6.h>
40 #include <linux/icmpv6.h>
41 #include <linux/random.h>
42 #include <linux/indirect_call_wrapper.h>
43
44 #include <net/tcp.h>
45 #include <net/ndisc.h>
46 #include <net/inet6_hashtables.h>
47 #include <net/inet6_connection_sock.h>
48 #include <net/ipv6.h>
49 #include <net/transp_v6.h>
50 #include <net/addrconf.h>
51 #include <net/ip6_route.h>
52 #include <net/ip6_checksum.h>
53 #include <net/inet_ecn.h>
54 #include <net/protocol.h>
55 #include <net/xfrm.h>
56 #include <net/snmp.h>
57 #include <net/dsfield.h>
58 #include <net/timewait_sock.h>
59 #include <net/inet_common.h>
60 #include <net/secure_seq.h>
61 #include <net/busy_poll.h>
62
63 #include <linux/proc_fs.h>
64 #include <linux/seq_file.h>
65
66 #include <crypto/hash.h>
67 #include <linux/scatterlist.h>
68
69 #include <trace/events/tcp.h>
70
71 static void     tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
72 static void     tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
73                                       struct request_sock *req);
74
75 INDIRECT_CALLABLE_SCOPE int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
76
77 static const struct inet_connection_sock_af_ops ipv6_mapped;
78 const struct inet_connection_sock_af_ops ipv6_specific;
79 #ifdef CONFIG_TCP_MD5SIG
80 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
81 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
82 #else
83 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
84                                                    const struct in6_addr *addr,
85                                                    int l3index)
86 {
87         return NULL;
88 }
89 #endif
90
91 /* Helper returning the inet6 address from a given tcp socket.
92  * It can be used in TCP stack instead of inet6_sk(sk).
93  * This avoids a dereference and allow compiler optimizations.
94  * It is a specialized version of inet6_sk_generic().
95  */
96 static struct ipv6_pinfo *tcp_inet6_sk(const struct sock *sk)
97 {
98         unsigned int offset = sizeof(struct tcp6_sock) - sizeof(struct ipv6_pinfo);
99
100         return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
101 }
102
103 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
104 {
105         struct dst_entry *dst = skb_dst(skb);
106
107         if (dst && dst_hold_safe(dst)) {
108                 const struct rt6_info *rt = (const struct rt6_info *)dst;
109
110                 rcu_assign_pointer(sk->sk_rx_dst, dst);
111                 sk->sk_rx_dst_ifindex = skb->skb_iif;
112                 sk->sk_rx_dst_cookie = rt6_get_cookie(rt);
113         }
114 }
115
116 static u32 tcp_v6_init_seq(const struct sk_buff *skb)
117 {
118         return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32,
119                                 ipv6_hdr(skb)->saddr.s6_addr32,
120                                 tcp_hdr(skb)->dest,
121                                 tcp_hdr(skb)->source);
122 }
123
124 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
125 {
126         return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32,
127                                    ipv6_hdr(skb)->saddr.s6_addr32);
128 }
129
130 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
131                               int addr_len)
132 {
133         /* This check is replicated from tcp_v6_connect() and intended to
134          * prevent BPF program called below from accessing bytes that are out
135          * of the bound specified by user in addr_len.
136          */
137         if (addr_len < SIN6_LEN_RFC2133)
138                 return -EINVAL;
139
140         sock_owned_by_me(sk);
141
142         return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr);
143 }
144
145 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
146                           int addr_len)
147 {
148         struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
149         struct inet_connection_sock *icsk = inet_csk(sk);
150         struct in6_addr *saddr = NULL, *final_p, final;
151         struct inet_timewait_death_row *tcp_death_row;
152         struct ipv6_pinfo *np = tcp_inet6_sk(sk);
153         struct inet_sock *inet = inet_sk(sk);
154         struct tcp_sock *tp = tcp_sk(sk);
155         struct net *net = sock_net(sk);
156         struct ipv6_txoptions *opt;
157         struct dst_entry *dst;
158         struct flowi6 fl6;
159         int addr_type;
160         int err;
161
162         if (addr_len < SIN6_LEN_RFC2133)
163                 return -EINVAL;
164
165         if (usin->sin6_family != AF_INET6)
166                 return -EAFNOSUPPORT;
167
168         memset(&fl6, 0, sizeof(fl6));
169
170         if (np->sndflow) {
171                 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
172                 IP6_ECN_flow_init(fl6.flowlabel);
173                 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
174                         struct ip6_flowlabel *flowlabel;
175                         flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
176                         if (IS_ERR(flowlabel))
177                                 return -EINVAL;
178                         fl6_sock_release(flowlabel);
179                 }
180         }
181
182         /*
183          *      connect() to INADDR_ANY means loopback (BSD'ism).
184          */
185
186         if (ipv6_addr_any(&usin->sin6_addr)) {
187                 if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
188                         ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
189                                                &usin->sin6_addr);
190                 else
191                         usin->sin6_addr = in6addr_loopback;
192         }
193
194         addr_type = ipv6_addr_type(&usin->sin6_addr);
195
196         if (addr_type & IPV6_ADDR_MULTICAST)
197                 return -ENETUNREACH;
198
199         if (addr_type&IPV6_ADDR_LINKLOCAL) {
200                 if (addr_len >= sizeof(struct sockaddr_in6) &&
201                     usin->sin6_scope_id) {
202                         /* If interface is set while binding, indices
203                          * must coincide.
204                          */
205                         if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id))
206                                 return -EINVAL;
207
208                         sk->sk_bound_dev_if = usin->sin6_scope_id;
209                 }
210
211                 /* Connect to link-local address requires an interface */
212                 if (!sk->sk_bound_dev_if)
213                         return -EINVAL;
214         }
215
216         if (tp->rx_opt.ts_recent_stamp &&
217             !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
218                 tp->rx_opt.ts_recent = 0;
219                 tp->rx_opt.ts_recent_stamp = 0;
220                 WRITE_ONCE(tp->write_seq, 0);
221         }
222
223         sk->sk_v6_daddr = usin->sin6_addr;
224         np->flow_label = fl6.flowlabel;
225
226         /*
227          *      TCP over IPv4
228          */
229
230         if (addr_type & IPV6_ADDR_MAPPED) {
231                 u32 exthdrlen = icsk->icsk_ext_hdr_len;
232                 struct sockaddr_in sin;
233
234                 if (ipv6_only_sock(sk))
235                         return -ENETUNREACH;
236
237                 sin.sin_family = AF_INET;
238                 sin.sin_port = usin->sin6_port;
239                 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
240
241                 /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */
242                 WRITE_ONCE(icsk->icsk_af_ops, &ipv6_mapped);
243                 if (sk_is_mptcp(sk))
244                         mptcpv6_handle_mapped(sk, true);
245                 sk->sk_backlog_rcv = tcp_v4_do_rcv;
246 #ifdef CONFIG_TCP_MD5SIG
247                 tp->af_specific = &tcp_sock_ipv6_mapped_specific;
248 #endif
249
250                 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
251
252                 if (err) {
253                         icsk->icsk_ext_hdr_len = exthdrlen;
254                         /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */
255                         WRITE_ONCE(icsk->icsk_af_ops, &ipv6_specific);
256                         if (sk_is_mptcp(sk))
257                                 mptcpv6_handle_mapped(sk, false);
258                         sk->sk_backlog_rcv = tcp_v6_do_rcv;
259 #ifdef CONFIG_TCP_MD5SIG
260                         tp->af_specific = &tcp_sock_ipv6_specific;
261 #endif
262                         goto failure;
263                 }
264                 np->saddr = sk->sk_v6_rcv_saddr;
265
266                 return err;
267         }
268
269         if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
270                 saddr = &sk->sk_v6_rcv_saddr;
271
272         fl6.flowi6_proto = IPPROTO_TCP;
273         fl6.daddr = sk->sk_v6_daddr;
274         fl6.saddr = saddr ? *saddr : np->saddr;
275         fl6.flowi6_oif = sk->sk_bound_dev_if;
276         fl6.flowi6_mark = sk->sk_mark;
277         fl6.fl6_dport = usin->sin6_port;
278         fl6.fl6_sport = inet->inet_sport;
279         fl6.flowi6_uid = sk->sk_uid;
280
281         opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
282         final_p = fl6_update_dst(&fl6, opt, &final);
283
284         security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
285
286         dst = ip6_dst_lookup_flow(net, sk, &fl6, final_p);
287         if (IS_ERR(dst)) {
288                 err = PTR_ERR(dst);
289                 goto failure;
290         }
291
292         tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
293
294         if (!saddr) {
295                 struct inet_bind_hashbucket *prev_addr_hashbucket = NULL;
296                 struct in6_addr prev_v6_rcv_saddr;
297
298                 if (icsk->icsk_bind2_hash) {
299                         prev_addr_hashbucket = inet_bhashfn_portaddr(tcp_death_row->hashinfo,
300                                                                      sk, net, inet->inet_num);
301                         prev_v6_rcv_saddr = sk->sk_v6_rcv_saddr;
302                 }
303                 saddr = &fl6.saddr;
304                 sk->sk_v6_rcv_saddr = *saddr;
305
306                 if (prev_addr_hashbucket) {
307                         err = inet_bhash2_update_saddr(prev_addr_hashbucket, sk);
308                         if (err) {
309                                 sk->sk_v6_rcv_saddr = prev_v6_rcv_saddr;
310                                 goto failure;
311                         }
312                 }
313         }
314
315         /* set the source address */
316         np->saddr = *saddr;
317         inet->inet_rcv_saddr = LOOPBACK4_IPV6;
318
319         sk->sk_gso_type = SKB_GSO_TCPV6;
320         ip6_dst_store(sk, dst, NULL, NULL);
321
322         icsk->icsk_ext_hdr_len = 0;
323         if (opt)
324                 icsk->icsk_ext_hdr_len = opt->opt_flen +
325                                          opt->opt_nflen;
326
327         tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
328
329         inet->inet_dport = usin->sin6_port;
330
331         tcp_set_state(sk, TCP_SYN_SENT);
332         err = inet6_hash_connect(tcp_death_row, sk);
333         if (err)
334                 goto late_failure;
335
336         sk_set_txhash(sk);
337
338         if (likely(!tp->repair)) {
339                 if (!tp->write_seq)
340                         WRITE_ONCE(tp->write_seq,
341                                    secure_tcpv6_seq(np->saddr.s6_addr32,
342                                                     sk->sk_v6_daddr.s6_addr32,
343                                                     inet->inet_sport,
344                                                     inet->inet_dport));
345                 tp->tsoffset = secure_tcpv6_ts_off(net, np->saddr.s6_addr32,
346                                                    sk->sk_v6_daddr.s6_addr32);
347         }
348
349         if (tcp_fastopen_defer_connect(sk, &err))
350                 return err;
351         if (err)
352                 goto late_failure;
353
354         err = tcp_connect(sk);
355         if (err)
356                 goto late_failure;
357
358         return 0;
359
360 late_failure:
361         tcp_set_state(sk, TCP_CLOSE);
362 failure:
363         inet->inet_dport = 0;
364         sk->sk_route_caps = 0;
365         return err;
366 }
367
368 static void tcp_v6_mtu_reduced(struct sock *sk)
369 {
370         struct dst_entry *dst;
371         u32 mtu;
372
373         if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
374                 return;
375
376         mtu = READ_ONCE(tcp_sk(sk)->mtu_info);
377
378         /* Drop requests trying to increase our current mss.
379          * Check done in __ip6_rt_update_pmtu() is too late.
380          */
381         if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache)
382                 return;
383
384         dst = inet6_csk_update_pmtu(sk, mtu);
385         if (!dst)
386                 return;
387
388         if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
389                 tcp_sync_mss(sk, dst_mtu(dst));
390                 tcp_simple_retransmit(sk);
391         }
392 }
393
394 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
395                 u8 type, u8 code, int offset, __be32 info)
396 {
397         const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
398         const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
399         struct net *net = dev_net(skb->dev);
400         struct request_sock *fastopen;
401         struct ipv6_pinfo *np;
402         struct tcp_sock *tp;
403         __u32 seq, snd_una;
404         struct sock *sk;
405         bool fatal;
406         int err;
407
408         sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
409                                         &hdr->daddr, th->dest,
410                                         &hdr->saddr, ntohs(th->source),
411                                         skb->dev->ifindex, inet6_sdif(skb));
412
413         if (!sk) {
414                 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
415                                   ICMP6_MIB_INERRORS);
416                 return -ENOENT;
417         }
418
419         if (sk->sk_state == TCP_TIME_WAIT) {
420                 inet_twsk_put(inet_twsk(sk));
421                 return 0;
422         }
423         seq = ntohl(th->seq);
424         fatal = icmpv6_err_convert(type, code, &err);
425         if (sk->sk_state == TCP_NEW_SYN_RECV) {
426                 tcp_req_err(sk, seq, fatal);
427                 return 0;
428         }
429
430         bh_lock_sock(sk);
431         if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
432                 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
433
434         if (sk->sk_state == TCP_CLOSE)
435                 goto out;
436
437         if (static_branch_unlikely(&ip6_min_hopcount)) {
438                 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
439                 if (ipv6_hdr(skb)->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) {
440                         __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
441                         goto out;
442                 }
443         }
444
445         tp = tcp_sk(sk);
446         /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
447         fastopen = rcu_dereference(tp->fastopen_rsk);
448         snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
449         if (sk->sk_state != TCP_LISTEN &&
450             !between(seq, snd_una, tp->snd_nxt)) {
451                 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
452                 goto out;
453         }
454
455         np = tcp_inet6_sk(sk);
456
457         if (type == NDISC_REDIRECT) {
458                 if (!sock_owned_by_user(sk)) {
459                         struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
460
461                         if (dst)
462                                 dst->ops->redirect(dst, sk, skb);
463                 }
464                 goto out;
465         }
466
467         if (type == ICMPV6_PKT_TOOBIG) {
468                 u32 mtu = ntohl(info);
469
470                 /* We are not interested in TCP_LISTEN and open_requests
471                  * (SYN-ACKs send out by Linux are always <576bytes so
472                  * they should go through unfragmented).
473                  */
474                 if (sk->sk_state == TCP_LISTEN)
475                         goto out;
476
477                 if (!ip6_sk_accept_pmtu(sk))
478                         goto out;
479
480                 if (mtu < IPV6_MIN_MTU)
481                         goto out;
482
483                 WRITE_ONCE(tp->mtu_info, mtu);
484
485                 if (!sock_owned_by_user(sk))
486                         tcp_v6_mtu_reduced(sk);
487                 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
488                                            &sk->sk_tsq_flags))
489                         sock_hold(sk);
490                 goto out;
491         }
492
493
494         /* Might be for an request_sock */
495         switch (sk->sk_state) {
496         case TCP_SYN_SENT:
497         case TCP_SYN_RECV:
498                 /* Only in fast or simultaneous open. If a fast open socket is
499                  * already accepted it is treated as a connected one below.
500                  */
501                 if (fastopen && !fastopen->sk)
502                         break;
503
504                 ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th);
505
506                 if (!sock_owned_by_user(sk)) {
507                         sk->sk_err = err;
508                         sk_error_report(sk);            /* Wake people up to see the error (see connect in sock.c) */
509
510                         tcp_done(sk);
511                 } else
512                         sk->sk_err_soft = err;
513                 goto out;
514         case TCP_LISTEN:
515                 break;
516         default:
517                 /* check if this ICMP message allows revert of backoff.
518                  * (see RFC 6069)
519                  */
520                 if (!fastopen && type == ICMPV6_DEST_UNREACH &&
521                     code == ICMPV6_NOROUTE)
522                         tcp_ld_RTO_revert(sk, seq);
523         }
524
525         if (!sock_owned_by_user(sk) && np->recverr) {
526                 sk->sk_err = err;
527                 sk_error_report(sk);
528         } else
529                 sk->sk_err_soft = err;
530
531 out:
532         bh_unlock_sock(sk);
533         sock_put(sk);
534         return 0;
535 }
536
537
538 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
539                               struct flowi *fl,
540                               struct request_sock *req,
541                               struct tcp_fastopen_cookie *foc,
542                               enum tcp_synack_type synack_type,
543                               struct sk_buff *syn_skb)
544 {
545         struct inet_request_sock *ireq = inet_rsk(req);
546         struct ipv6_pinfo *np = tcp_inet6_sk(sk);
547         struct ipv6_txoptions *opt;
548         struct flowi6 *fl6 = &fl->u.ip6;
549         struct sk_buff *skb;
550         int err = -ENOMEM;
551         u8 tclass;
552
553         /* First, grab a route. */
554         if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
555                                                IPPROTO_TCP)) == NULL)
556                 goto done;
557
558         skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb);
559
560         if (skb) {
561                 __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
562                                     &ireq->ir_v6_rmt_addr);
563
564                 fl6->daddr = ireq->ir_v6_rmt_addr;
565                 if (np->repflow && ireq->pktopts)
566                         fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
567
568                 tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ?
569                                 (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
570                                 (np->tclass & INET_ECN_MASK) :
571                                 np->tclass;
572
573                 if (!INET_ECN_is_capable(tclass) &&
574                     tcp_bpf_ca_needs_ecn((struct sock *)req))
575                         tclass |= INET_ECN_ECT_0;
576
577                 rcu_read_lock();
578                 opt = ireq->ipv6_opt;
579                 if (!opt)
580                         opt = rcu_dereference(np->opt);
581                 err = ip6_xmit(sk, skb, fl6, skb->mark ? : sk->sk_mark, opt,
582                                tclass, sk->sk_priority);
583                 rcu_read_unlock();
584                 err = net_xmit_eval(err);
585         }
586
587 done:
588         return err;
589 }
590
591
592 static void tcp_v6_reqsk_destructor(struct request_sock *req)
593 {
594         kfree(inet_rsk(req)->ipv6_opt);
595         consume_skb(inet_rsk(req)->pktopts);
596 }
597
598 #ifdef CONFIG_TCP_MD5SIG
599 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
600                                                    const struct in6_addr *addr,
601                                                    int l3index)
602 {
603         return tcp_md5_do_lookup(sk, l3index,
604                                  (union tcp_md5_addr *)addr, AF_INET6);
605 }
606
607 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
608                                                 const struct sock *addr_sk)
609 {
610         int l3index;
611
612         l3index = l3mdev_master_ifindex_by_index(sock_net(sk),
613                                                  addr_sk->sk_bound_dev_if);
614         return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr,
615                                     l3index);
616 }
617
618 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
619                                  sockptr_t optval, int optlen)
620 {
621         struct tcp_md5sig cmd;
622         struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
623         int l3index = 0;
624         u8 prefixlen;
625         u8 flags;
626
627         if (optlen < sizeof(cmd))
628                 return -EINVAL;
629
630         if (copy_from_sockptr(&cmd, optval, sizeof(cmd)))
631                 return -EFAULT;
632
633         if (sin6->sin6_family != AF_INET6)
634                 return -EINVAL;
635
636         flags = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX;
637
638         if (optname == TCP_MD5SIG_EXT &&
639             cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
640                 prefixlen = cmd.tcpm_prefixlen;
641                 if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
642                                         prefixlen > 32))
643                         return -EINVAL;
644         } else {
645                 prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
646         }
647
648         if (optname == TCP_MD5SIG_EXT && cmd.tcpm_ifindex &&
649             cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) {
650                 struct net_device *dev;
651
652                 rcu_read_lock();
653                 dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex);
654                 if (dev && netif_is_l3_master(dev))
655                         l3index = dev->ifindex;
656                 rcu_read_unlock();
657
658                 /* ok to reference set/not set outside of rcu;
659                  * right now device MUST be an L3 master
660                  */
661                 if (!dev || !l3index)
662                         return -EINVAL;
663         }
664
665         if (!cmd.tcpm_keylen) {
666                 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
667                         return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
668                                               AF_INET, prefixlen,
669                                               l3index, flags);
670                 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
671                                       AF_INET6, prefixlen, l3index, flags);
672         }
673
674         if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
675                 return -EINVAL;
676
677         if (ipv6_addr_v4mapped(&sin6->sin6_addr))
678                 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
679                                       AF_INET, prefixlen, l3index, flags,
680                                       cmd.tcpm_key, cmd.tcpm_keylen,
681                                       GFP_KERNEL);
682
683         return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
684                               AF_INET6, prefixlen, l3index, flags,
685                               cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
686 }
687
688 static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
689                                    const struct in6_addr *daddr,
690                                    const struct in6_addr *saddr,
691                                    const struct tcphdr *th, int nbytes)
692 {
693         struct tcp6_pseudohdr *bp;
694         struct scatterlist sg;
695         struct tcphdr *_th;
696
697         bp = hp->scratch;
698         /* 1. TCP pseudo-header (RFC2460) */
699         bp->saddr = *saddr;
700         bp->daddr = *daddr;
701         bp->protocol = cpu_to_be32(IPPROTO_TCP);
702         bp->len = cpu_to_be32(nbytes);
703
704         _th = (struct tcphdr *)(bp + 1);
705         memcpy(_th, th, sizeof(*th));
706         _th->check = 0;
707
708         sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
709         ahash_request_set_crypt(hp->md5_req, &sg, NULL,
710                                 sizeof(*bp) + sizeof(*th));
711         return crypto_ahash_update(hp->md5_req);
712 }
713
714 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
715                                const struct in6_addr *daddr, struct in6_addr *saddr,
716                                const struct tcphdr *th)
717 {
718         struct tcp_md5sig_pool *hp;
719         struct ahash_request *req;
720
721         hp = tcp_get_md5sig_pool();
722         if (!hp)
723                 goto clear_hash_noput;
724         req = hp->md5_req;
725
726         if (crypto_ahash_init(req))
727                 goto clear_hash;
728         if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
729                 goto clear_hash;
730         if (tcp_md5_hash_key(hp, key))
731                 goto clear_hash;
732         ahash_request_set_crypt(req, NULL, md5_hash, 0);
733         if (crypto_ahash_final(req))
734                 goto clear_hash;
735
736         tcp_put_md5sig_pool();
737         return 0;
738
739 clear_hash:
740         tcp_put_md5sig_pool();
741 clear_hash_noput:
742         memset(md5_hash, 0, 16);
743         return 1;
744 }
745
746 static int tcp_v6_md5_hash_skb(char *md5_hash,
747                                const struct tcp_md5sig_key *key,
748                                const struct sock *sk,
749                                const struct sk_buff *skb)
750 {
751         const struct in6_addr *saddr, *daddr;
752         struct tcp_md5sig_pool *hp;
753         struct ahash_request *req;
754         const struct tcphdr *th = tcp_hdr(skb);
755
756         if (sk) { /* valid for establish/request sockets */
757                 saddr = &sk->sk_v6_rcv_saddr;
758                 daddr = &sk->sk_v6_daddr;
759         } else {
760                 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
761                 saddr = &ip6h->saddr;
762                 daddr = &ip6h->daddr;
763         }
764
765         hp = tcp_get_md5sig_pool();
766         if (!hp)
767                 goto clear_hash_noput;
768         req = hp->md5_req;
769
770         if (crypto_ahash_init(req))
771                 goto clear_hash;
772
773         if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
774                 goto clear_hash;
775         if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
776                 goto clear_hash;
777         if (tcp_md5_hash_key(hp, key))
778                 goto clear_hash;
779         ahash_request_set_crypt(req, NULL, md5_hash, 0);
780         if (crypto_ahash_final(req))
781                 goto clear_hash;
782
783         tcp_put_md5sig_pool();
784         return 0;
785
786 clear_hash:
787         tcp_put_md5sig_pool();
788 clear_hash_noput:
789         memset(md5_hash, 0, 16);
790         return 1;
791 }
792
793 #endif
794
795 static void tcp_v6_init_req(struct request_sock *req,
796                             const struct sock *sk_listener,
797                             struct sk_buff *skb)
798 {
799         bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
800         struct inet_request_sock *ireq = inet_rsk(req);
801         const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener);
802
803         ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
804         ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
805
806         /* So that link locals have meaning */
807         if ((!sk_listener->sk_bound_dev_if || l3_slave) &&
808             ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
809                 ireq->ir_iif = tcp_v6_iif(skb);
810
811         if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
812             (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
813              np->rxopt.bits.rxinfo ||
814              np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
815              np->rxopt.bits.rxohlim || np->repflow)) {
816                 refcount_inc(&skb->users);
817                 ireq->pktopts = skb;
818         }
819 }
820
821 static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
822                                           struct sk_buff *skb,
823                                           struct flowi *fl,
824                                           struct request_sock *req)
825 {
826         tcp_v6_init_req(req, sk, skb);
827
828         if (security_inet_conn_request(sk, skb, req))
829                 return NULL;
830
831         return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
832 }
833
834 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
835         .family         =       AF_INET6,
836         .obj_size       =       sizeof(struct tcp6_request_sock),
837         .rtx_syn_ack    =       tcp_rtx_synack,
838         .send_ack       =       tcp_v6_reqsk_send_ack,
839         .destructor     =       tcp_v6_reqsk_destructor,
840         .send_reset     =       tcp_v6_send_reset,
841         .syn_ack_timeout =      tcp_syn_ack_timeout,
842 };
843
844 const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
845         .mss_clamp      =       IPV6_MIN_MTU - sizeof(struct tcphdr) -
846                                 sizeof(struct ipv6hdr),
847 #ifdef CONFIG_TCP_MD5SIG
848         .req_md5_lookup =       tcp_v6_md5_lookup,
849         .calc_md5_hash  =       tcp_v6_md5_hash_skb,
850 #endif
851 #ifdef CONFIG_SYN_COOKIES
852         .cookie_init_seq =      cookie_v6_init_sequence,
853 #endif
854         .route_req      =       tcp_v6_route_req,
855         .init_seq       =       tcp_v6_init_seq,
856         .init_ts_off    =       tcp_v6_init_ts_off,
857         .send_synack    =       tcp_v6_send_synack,
858 };
859
860 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
861                                  u32 ack, u32 win, u32 tsval, u32 tsecr,
862                                  int oif, struct tcp_md5sig_key *key, int rst,
863                                  u8 tclass, __be32 label, u32 priority, u32 txhash)
864 {
865         const struct tcphdr *th = tcp_hdr(skb);
866         struct tcphdr *t1;
867         struct sk_buff *buff;
868         struct flowi6 fl6;
869         struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
870         struct sock *ctl_sk = net->ipv6.tcp_sk;
871         unsigned int tot_len = sizeof(struct tcphdr);
872         __be32 mrst = 0, *topt;
873         struct dst_entry *dst;
874         __u32 mark = 0;
875
876         if (tsecr)
877                 tot_len += TCPOLEN_TSTAMP_ALIGNED;
878 #ifdef CONFIG_TCP_MD5SIG
879         if (key)
880                 tot_len += TCPOLEN_MD5SIG_ALIGNED;
881 #endif
882
883 #ifdef CONFIG_MPTCP
884         if (rst && !key) {
885                 mrst = mptcp_reset_option(skb);
886
887                 if (mrst)
888                         tot_len += sizeof(__be32);
889         }
890 #endif
891
892         buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
893         if (!buff)
894                 return;
895
896         skb_reserve(buff, MAX_TCP_HEADER);
897
898         t1 = skb_push(buff, tot_len);
899         skb_reset_transport_header(buff);
900
901         /* Swap the send and the receive. */
902         memset(t1, 0, sizeof(*t1));
903         t1->dest = th->source;
904         t1->source = th->dest;
905         t1->doff = tot_len / 4;
906         t1->seq = htonl(seq);
907         t1->ack_seq = htonl(ack);
908         t1->ack = !rst || !th->ack;
909         t1->rst = rst;
910         t1->window = htons(win);
911
912         topt = (__be32 *)(t1 + 1);
913
914         if (tsecr) {
915                 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
916                                 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
917                 *topt++ = htonl(tsval);
918                 *topt++ = htonl(tsecr);
919         }
920
921         if (mrst)
922                 *topt++ = mrst;
923
924 #ifdef CONFIG_TCP_MD5SIG
925         if (key) {
926                 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
927                                 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
928                 tcp_v6_md5_hash_hdr((__u8 *)topt, key,
929                                     &ipv6_hdr(skb)->saddr,
930                                     &ipv6_hdr(skb)->daddr, t1);
931         }
932 #endif
933
934         memset(&fl6, 0, sizeof(fl6));
935         fl6.daddr = ipv6_hdr(skb)->saddr;
936         fl6.saddr = ipv6_hdr(skb)->daddr;
937         fl6.flowlabel = label;
938
939         buff->ip_summed = CHECKSUM_PARTIAL;
940
941         __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
942
943         fl6.flowi6_proto = IPPROTO_TCP;
944         if (rt6_need_strict(&fl6.daddr) && !oif)
945                 fl6.flowi6_oif = tcp_v6_iif(skb);
946         else {
947                 if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
948                         oif = skb->skb_iif;
949
950                 fl6.flowi6_oif = oif;
951         }
952
953         if (sk) {
954                 if (sk->sk_state == TCP_TIME_WAIT)
955                         mark = inet_twsk(sk)->tw_mark;
956                 else
957                         mark = sk->sk_mark;
958                 skb_set_delivery_time(buff, tcp_transmit_time(sk), true);
959         }
960         if (txhash) {
961                 /* autoflowlabel/skb_get_hash_flowi6 rely on buff->hash */
962                 skb_set_hash(buff, txhash, PKT_HASH_TYPE_L4);
963         }
964         fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark;
965         fl6.fl6_dport = t1->dest;
966         fl6.fl6_sport = t1->source;
967         fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
968         security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
969
970         /* Pass a socket to ip6_dst_lookup either it is for RST
971          * Underlying function will use this to retrieve the network
972          * namespace
973          */
974         if (sk && sk->sk_state != TCP_TIME_WAIT)
975                 dst = ip6_dst_lookup_flow(net, sk, &fl6, NULL); /*sk's xfrm_policy can be referred*/
976         else
977                 dst = ip6_dst_lookup_flow(net, ctl_sk, &fl6, NULL);
978         if (!IS_ERR(dst)) {
979                 skb_dst_set(buff, dst);
980                 ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL,
981                          tclass & ~INET_ECN_MASK, priority);
982                 TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
983                 if (rst)
984                         TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
985                 return;
986         }
987
988         kfree_skb(buff);
989 }
990
991 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
992 {
993         const struct tcphdr *th = tcp_hdr(skb);
994         struct ipv6hdr *ipv6h = ipv6_hdr(skb);
995         u32 seq = 0, ack_seq = 0;
996         struct tcp_md5sig_key *key = NULL;
997 #ifdef CONFIG_TCP_MD5SIG
998         const __u8 *hash_location = NULL;
999         unsigned char newhash[16];
1000         int genhash;
1001         struct sock *sk1 = NULL;
1002 #endif
1003         __be32 label = 0;
1004         u32 priority = 0;
1005         struct net *net;
1006         u32 txhash = 0;
1007         int oif = 0;
1008
1009         if (th->rst)
1010                 return;
1011
1012         /* If sk not NULL, it means we did a successful lookup and incoming
1013          * route had to be correct. prequeue might have dropped our dst.
1014          */
1015         if (!sk && !ipv6_unicast_destination(skb))
1016                 return;
1017
1018         net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
1019 #ifdef CONFIG_TCP_MD5SIG
1020         rcu_read_lock();
1021         hash_location = tcp_parse_md5sig_option(th);
1022         if (sk && sk_fullsock(sk)) {
1023                 int l3index;
1024
1025                 /* sdif set, means packet ingressed via a device
1026                  * in an L3 domain and inet_iif is set to it.
1027                  */
1028                 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1029                 key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index);
1030         } else if (hash_location) {
1031                 int dif = tcp_v6_iif_l3_slave(skb);
1032                 int sdif = tcp_v6_sdif(skb);
1033                 int l3index;
1034
1035                 /*
1036                  * active side is lost. Try to find listening socket through
1037                  * source port, and then find md5 key through listening socket.
1038                  * we are not loose security here:
1039                  * Incoming packet is checked with md5 hash with finding key,
1040                  * no RST generated if md5 hash doesn't match.
1041                  */
1042                 sk1 = inet6_lookup_listener(net, net->ipv4.tcp_death_row.hashinfo,
1043                                             NULL, 0, &ipv6h->saddr, th->source,
1044                                             &ipv6h->daddr, ntohs(th->source),
1045                                             dif, sdif);
1046                 if (!sk1)
1047                         goto out;
1048
1049                 /* sdif set, means packet ingressed via a device
1050                  * in an L3 domain and dif is set to it.
1051                  */
1052                 l3index = tcp_v6_sdif(skb) ? dif : 0;
1053
1054                 key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index);
1055                 if (!key)
1056                         goto out;
1057
1058                 genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
1059                 if (genhash || memcmp(hash_location, newhash, 16) != 0)
1060                         goto out;
1061         }
1062 #endif
1063
1064         if (th->ack)
1065                 seq = ntohl(th->ack_seq);
1066         else
1067                 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
1068                           (th->doff << 2);
1069
1070         if (sk) {
1071                 oif = sk->sk_bound_dev_if;
1072                 if (sk_fullsock(sk)) {
1073                         const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1074
1075                         trace_tcp_send_reset(sk, skb);
1076                         if (np->repflow)
1077                                 label = ip6_flowlabel(ipv6h);
1078                         priority = sk->sk_priority;
1079                         txhash = sk->sk_hash;
1080                 }
1081                 if (sk->sk_state == TCP_TIME_WAIT) {
1082                         label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel);
1083                         priority = inet_twsk(sk)->tw_priority;
1084                         txhash = inet_twsk(sk)->tw_txhash;
1085                 }
1086         } else {
1087                 if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET)
1088                         label = ip6_flowlabel(ipv6h);
1089         }
1090
1091         tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1,
1092                              ipv6_get_dsfield(ipv6h), label, priority, txhash);
1093
1094 #ifdef CONFIG_TCP_MD5SIG
1095 out:
1096         rcu_read_unlock();
1097 #endif
1098 }
1099
1100 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
1101                             u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
1102                             struct tcp_md5sig_key *key, u8 tclass,
1103                             __be32 label, u32 priority, u32 txhash)
1104 {
1105         tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
1106                              tclass, label, priority, txhash);
1107 }
1108
1109 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1110 {
1111         struct inet_timewait_sock *tw = inet_twsk(sk);
1112         struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1113
1114         tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1115                         tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1116                         tcp_time_stamp_raw() + tcptw->tw_ts_offset,
1117                         tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
1118                         tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority,
1119                         tw->tw_txhash);
1120
1121         inet_twsk_put(tw);
1122 }
1123
1124 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
1125                                   struct request_sock *req)
1126 {
1127         int l3index;
1128
1129         l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1130
1131         /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
1132          * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
1133          */
1134         /* RFC 7323 2.3
1135          * The window field (SEG.WND) of every outgoing segment, with the
1136          * exception of <SYN> segments, MUST be right-shifted by
1137          * Rcv.Wind.Shift bits:
1138          */
1139         tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
1140                         tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
1141                         tcp_rsk(req)->rcv_nxt,
1142                         req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
1143                         tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
1144                         req->ts_recent, sk->sk_bound_dev_if,
1145                         tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index),
1146                         ipv6_get_dsfield(ipv6_hdr(skb)), 0, sk->sk_priority,
1147                         tcp_rsk(req)->txhash);
1148 }
1149
1150
1151 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
1152 {
1153 #ifdef CONFIG_SYN_COOKIES
1154         const struct tcphdr *th = tcp_hdr(skb);
1155
1156         if (!th->syn)
1157                 sk = cookie_v6_check(sk, skb);
1158 #endif
1159         return sk;
1160 }
1161
1162 u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
1163                          struct tcphdr *th, u32 *cookie)
1164 {
1165         u16 mss = 0;
1166 #ifdef CONFIG_SYN_COOKIES
1167         mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops,
1168                                     &tcp_request_sock_ipv6_ops, sk, th);
1169         if (mss) {
1170                 *cookie = __cookie_v6_init_sequence(iph, th, &mss);
1171                 tcp_synq_overflow(sk);
1172         }
1173 #endif
1174         return mss;
1175 }
1176
1177 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1178 {
1179         if (skb->protocol == htons(ETH_P_IP))
1180                 return tcp_v4_conn_request(sk, skb);
1181
1182         if (!ipv6_unicast_destination(skb))
1183                 goto drop;
1184
1185         if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) {
1186                 __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS);
1187                 return 0;
1188         }
1189
1190         return tcp_conn_request(&tcp6_request_sock_ops,
1191                                 &tcp_request_sock_ipv6_ops, sk, skb);
1192
1193 drop:
1194         tcp_listendrop(sk);
1195         return 0; /* don't send reset */
1196 }
1197
1198 static void tcp_v6_restore_cb(struct sk_buff *skb)
1199 {
1200         /* We need to move header back to the beginning if xfrm6_policy_check()
1201          * and tcp_v6_fill_cb() are going to be called again.
1202          * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
1203          */
1204         memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
1205                 sizeof(struct inet6_skb_parm));
1206 }
1207
1208 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1209                                          struct request_sock *req,
1210                                          struct dst_entry *dst,
1211                                          struct request_sock *req_unhash,
1212                                          bool *own_req)
1213 {
1214         struct inet_request_sock *ireq;
1215         struct ipv6_pinfo *newnp;
1216         const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1217         struct ipv6_txoptions *opt;
1218         struct inet_sock *newinet;
1219         bool found_dup_sk = false;
1220         struct tcp_sock *newtp;
1221         struct sock *newsk;
1222 #ifdef CONFIG_TCP_MD5SIG
1223         struct tcp_md5sig_key *key;
1224         int l3index;
1225 #endif
1226         struct flowi6 fl6;
1227
1228         if (skb->protocol == htons(ETH_P_IP)) {
1229                 /*
1230                  *      v6 mapped
1231                  */
1232
1233                 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
1234                                              req_unhash, own_req);
1235
1236                 if (!newsk)
1237                         return NULL;
1238
1239                 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1240
1241                 newnp = tcp_inet6_sk(newsk);
1242                 newtp = tcp_sk(newsk);
1243
1244                 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1245
1246                 newnp->saddr = newsk->sk_v6_rcv_saddr;
1247
1248                 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1249                 if (sk_is_mptcp(newsk))
1250                         mptcpv6_handle_mapped(newsk, true);
1251                 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1252 #ifdef CONFIG_TCP_MD5SIG
1253                 newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1254 #endif
1255
1256                 newnp->ipv6_mc_list = NULL;
1257                 newnp->ipv6_ac_list = NULL;
1258                 newnp->ipv6_fl_list = NULL;
1259                 newnp->pktoptions  = NULL;
1260                 newnp->opt         = NULL;
1261                 newnp->mcast_oif   = inet_iif(skb);
1262                 newnp->mcast_hops  = ip_hdr(skb)->ttl;
1263                 newnp->rcv_flowinfo = 0;
1264                 if (np->repflow)
1265                         newnp->flow_label = 0;
1266
1267                 /*
1268                  * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1269                  * here, tcp_create_openreq_child now does this for us, see the comment in
1270                  * that function for the gory details. -acme
1271                  */
1272
1273                 /* It is tricky place. Until this moment IPv4 tcp
1274                    worked with IPv6 icsk.icsk_af_ops.
1275                    Sync it now.
1276                  */
1277                 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1278
1279                 return newsk;
1280         }
1281
1282         ireq = inet_rsk(req);
1283
1284         if (sk_acceptq_is_full(sk))
1285                 goto out_overflow;
1286
1287         if (!dst) {
1288                 dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
1289                 if (!dst)
1290                         goto out;
1291         }
1292
1293         newsk = tcp_create_openreq_child(sk, req, skb);
1294         if (!newsk)
1295                 goto out_nonewsk;
1296
1297         /*
1298          * No need to charge this sock to the relevant IPv6 refcnt debug socks
1299          * count here, tcp_create_openreq_child now does this for us, see the
1300          * comment in that function for the gory details. -acme
1301          */
1302
1303         newsk->sk_gso_type = SKB_GSO_TCPV6;
1304         ip6_dst_store(newsk, dst, NULL, NULL);
1305         inet6_sk_rx_dst_set(newsk, skb);
1306
1307         inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1308
1309         newtp = tcp_sk(newsk);
1310         newinet = inet_sk(newsk);
1311         newnp = tcp_inet6_sk(newsk);
1312
1313         memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1314
1315         newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
1316         newnp->saddr = ireq->ir_v6_loc_addr;
1317         newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
1318         newsk->sk_bound_dev_if = ireq->ir_iif;
1319
1320         /* Now IPv6 options...
1321
1322            First: no IPv4 options.
1323          */
1324         newinet->inet_opt = NULL;
1325         newnp->ipv6_mc_list = NULL;
1326         newnp->ipv6_ac_list = NULL;
1327         newnp->ipv6_fl_list = NULL;
1328
1329         /* Clone RX bits */
1330         newnp->rxopt.all = np->rxopt.all;
1331
1332         newnp->pktoptions = NULL;
1333         newnp->opt        = NULL;
1334         newnp->mcast_oif  = tcp_v6_iif(skb);
1335         newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1336         newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1337         if (np->repflow)
1338                 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1339
1340         /* Set ToS of the new socket based upon the value of incoming SYN.
1341          * ECT bits are set later in tcp_init_transfer().
1342          */
1343         if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos))
1344                 newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;
1345
1346         /* Clone native IPv6 options from listening socket (if any)
1347
1348            Yes, keeping reference count would be much more clever,
1349            but we make one more one thing there: reattach optmem
1350            to newsk.
1351          */
1352         opt = ireq->ipv6_opt;
1353         if (!opt)
1354                 opt = rcu_dereference(np->opt);
1355         if (opt) {
1356                 opt = ipv6_dup_options(newsk, opt);
1357                 RCU_INIT_POINTER(newnp->opt, opt);
1358         }
1359         inet_csk(newsk)->icsk_ext_hdr_len = 0;
1360         if (opt)
1361                 inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
1362                                                     opt->opt_flen;
1363
1364         tcp_ca_openreq_child(newsk, dst);
1365
1366         tcp_sync_mss(newsk, dst_mtu(dst));
1367         newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1368
1369         tcp_initialize_rcv_mss(newsk);
1370
1371         newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1372         newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1373
1374 #ifdef CONFIG_TCP_MD5SIG
1375         l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif);
1376
1377         /* Copy over the MD5 key from the original socket */
1378         key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index);
1379         if (key) {
1380                 /* We're using one, so create a matching key
1381                  * on the newsk structure. If we fail to get
1382                  * memory, then we end up not copying the key
1383                  * across. Shucks.
1384                  */
1385                 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr,
1386                                AF_INET6, 128, l3index, key->flags, key->key, key->keylen,
1387                                sk_gfp_mask(sk, GFP_ATOMIC));
1388         }
1389 #endif
1390
1391         if (__inet_inherit_port(sk, newsk) < 0) {
1392                 inet_csk_prepare_forced_close(newsk);
1393                 tcp_done(newsk);
1394                 goto out;
1395         }
1396         *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash),
1397                                        &found_dup_sk);
1398         if (*own_req) {
1399                 tcp_move_syn(newtp, req);
1400
1401                 /* Clone pktoptions received with SYN, if we own the req */
1402                 if (ireq->pktopts) {
1403                         newnp->pktoptions = skb_clone(ireq->pktopts,
1404                                                       sk_gfp_mask(sk, GFP_ATOMIC));
1405                         consume_skb(ireq->pktopts);
1406                         ireq->pktopts = NULL;
1407                         if (newnp->pktoptions) {
1408                                 tcp_v6_restore_cb(newnp->pktoptions);
1409                                 skb_set_owner_r(newnp->pktoptions, newsk);
1410                         }
1411                 }
1412         } else {
1413                 if (!req_unhash && found_dup_sk) {
1414                         /* This code path should only be executed in the
1415                          * syncookie case only
1416                          */
1417                         bh_unlock_sock(newsk);
1418                         sock_put(newsk);
1419                         newsk = NULL;
1420                 }
1421         }
1422
1423         return newsk;
1424
1425 out_overflow:
1426         __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1427 out_nonewsk:
1428         dst_release(dst);
1429 out:
1430         tcp_listendrop(sk);
1431         return NULL;
1432 }
1433
1434 INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *,
1435                                                            u32));
1436 /* The socket must have it's spinlock held when we get
1437  * here, unless it is a TCP_LISTEN socket.
1438  *
1439  * We have a potential double-lock case here, so even when
1440  * doing backlog processing we use the BH locking scheme.
1441  * This is because we cannot sleep with the original spinlock
1442  * held.
1443  */
1444 INDIRECT_CALLABLE_SCOPE
1445 int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1446 {
1447         struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1448         struct sk_buff *opt_skb = NULL;
1449         enum skb_drop_reason reason;
1450         struct tcp_sock *tp;
1451
1452         /* Imagine: socket is IPv6. IPv4 packet arrives,
1453            goes to IPv4 receive handler and backlogged.
1454            From backlog it always goes here. Kerboom...
1455            Fortunately, tcp_rcv_established and rcv_established
1456            handle them correctly, but it is not case with
1457            tcp_v6_hnd_req and tcp_v6_send_reset().   --ANK
1458          */
1459
1460         if (skb->protocol == htons(ETH_P_IP))
1461                 return tcp_v4_do_rcv(sk, skb);
1462
1463         /*
1464          *      socket locking is here for SMP purposes as backlog rcv
1465          *      is currently called with bh processing disabled.
1466          */
1467
1468         /* Do Stevens' IPV6_PKTOPTIONS.
1469
1470            Yes, guys, it is the only place in our code, where we
1471            may make it not affecting IPv4.
1472            The rest of code is protocol independent,
1473            and I do not like idea to uglify IPv4.
1474
1475            Actually, all the idea behind IPV6_PKTOPTIONS
1476            looks not very well thought. For now we latch
1477            options, received in the last packet, enqueued
1478            by tcp. Feel free to propose better solution.
1479                                                --ANK (980728)
1480          */
1481         if (np->rxopt.all)
1482                 opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC));
1483
1484         reason = SKB_DROP_REASON_NOT_SPECIFIED;
1485         if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1486                 struct dst_entry *dst;
1487
1488                 dst = rcu_dereference_protected(sk->sk_rx_dst,
1489                                                 lockdep_sock_is_held(sk));
1490
1491                 sock_rps_save_rxhash(sk, skb);
1492                 sk_mark_napi_id(sk, skb);
1493                 if (dst) {
1494                         if (sk->sk_rx_dst_ifindex != skb->skb_iif ||
1495                             INDIRECT_CALL_1(dst->ops->check, ip6_dst_check,
1496                                             dst, sk->sk_rx_dst_cookie) == NULL) {
1497                                 RCU_INIT_POINTER(sk->sk_rx_dst, NULL);
1498                                 dst_release(dst);
1499                         }
1500                 }
1501
1502                 tcp_rcv_established(sk, skb);
1503                 if (opt_skb)
1504                         goto ipv6_pktoptions;
1505                 return 0;
1506         }
1507
1508         if (tcp_checksum_complete(skb))
1509                 goto csum_err;
1510
1511         if (sk->sk_state == TCP_LISTEN) {
1512                 struct sock *nsk = tcp_v6_cookie_check(sk, skb);
1513
1514                 if (!nsk)
1515                         goto discard;
1516
1517                 if (nsk != sk) {
1518                         if (tcp_child_process(sk, nsk, skb))
1519                                 goto reset;
1520                         if (opt_skb)
1521                                 __kfree_skb(opt_skb);
1522                         return 0;
1523                 }
1524         } else
1525                 sock_rps_save_rxhash(sk, skb);
1526
1527         if (tcp_rcv_state_process(sk, skb))
1528                 goto reset;
1529         if (opt_skb)
1530                 goto ipv6_pktoptions;
1531         return 0;
1532
1533 reset:
1534         tcp_v6_send_reset(sk, skb);
1535 discard:
1536         if (opt_skb)
1537                 __kfree_skb(opt_skb);
1538         kfree_skb_reason(skb, reason);
1539         return 0;
1540 csum_err:
1541         reason = SKB_DROP_REASON_TCP_CSUM;
1542         trace_tcp_bad_csum(skb);
1543         TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1544         TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1545         goto discard;
1546
1547
1548 ipv6_pktoptions:
1549         /* Do you ask, what is it?
1550
1551            1. skb was enqueued by tcp.
1552            2. skb is added to tail of read queue, rather than out of order.
1553            3. socket is not in passive state.
1554            4. Finally, it really contains options, which user wants to receive.
1555          */
1556         tp = tcp_sk(sk);
1557         if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1558             !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1559                 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1560                         np->mcast_oif = tcp_v6_iif(opt_skb);
1561                 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1562                         np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1563                 if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1564                         np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1565                 if (np->repflow)
1566                         np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1567                 if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
1568                         skb_set_owner_r(opt_skb, sk);
1569                         tcp_v6_restore_cb(opt_skb);
1570                         opt_skb = xchg(&np->pktoptions, opt_skb);
1571                 } else {
1572                         __kfree_skb(opt_skb);
1573                         opt_skb = xchg(&np->pktoptions, NULL);
1574                 }
1575         }
1576
1577         consume_skb(opt_skb);
1578         return 0;
1579 }
1580
1581 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
1582                            const struct tcphdr *th)
1583 {
1584         /* This is tricky: we move IP6CB at its correct location into
1585          * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
1586          * _decode_session6() uses IP6CB().
1587          * barrier() makes sure compiler won't play aliasing games.
1588          */
1589         memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
1590                 sizeof(struct inet6_skb_parm));
1591         barrier();
1592
1593         TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1594         TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1595                                     skb->len - th->doff*4);
1596         TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1597         TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1598         TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1599         TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1600         TCP_SKB_CB(skb)->sacked = 0;
1601         TCP_SKB_CB(skb)->has_rxtstamp =
1602                         skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1603 }
1604
1605 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
1606 {
1607         enum skb_drop_reason drop_reason;
1608         int sdif = inet6_sdif(skb);
1609         int dif = inet6_iif(skb);
1610         const struct tcphdr *th;
1611         const struct ipv6hdr *hdr;
1612         bool refcounted;
1613         struct sock *sk;
1614         int ret;
1615         struct net *net = dev_net(skb->dev);
1616
1617         drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
1618         if (skb->pkt_type != PACKET_HOST)
1619                 goto discard_it;
1620
1621         /*
1622          *      Count it even if it's bad.
1623          */
1624         __TCP_INC_STATS(net, TCP_MIB_INSEGS);
1625
1626         if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1627                 goto discard_it;
1628
1629         th = (const struct tcphdr *)skb->data;
1630
1631         if (unlikely(th->doff < sizeof(struct tcphdr) / 4)) {
1632                 drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL;
1633                 goto bad_packet;
1634         }
1635         if (!pskb_may_pull(skb, th->doff*4))
1636                 goto discard_it;
1637
1638         if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1639                 goto csum_error;
1640
1641         th = (const struct tcphdr *)skb->data;
1642         hdr = ipv6_hdr(skb);
1643
1644 lookup:
1645         sk = __inet6_lookup_skb(net->ipv4.tcp_death_row.hashinfo, skb, __tcp_hdrlen(th),
1646                                 th->source, th->dest, inet6_iif(skb), sdif,
1647                                 &refcounted);
1648         if (!sk)
1649                 goto no_tcp_socket;
1650
1651 process:
1652         if (sk->sk_state == TCP_TIME_WAIT)
1653                 goto do_time_wait;
1654
1655         if (sk->sk_state == TCP_NEW_SYN_RECV) {
1656                 struct request_sock *req = inet_reqsk(sk);
1657                 bool req_stolen = false;
1658                 struct sock *nsk;
1659
1660                 sk = req->rsk_listener;
1661                 drop_reason = tcp_inbound_md5_hash(sk, skb,
1662                                                    &hdr->saddr, &hdr->daddr,
1663                                                    AF_INET6, dif, sdif);
1664                 if (drop_reason) {
1665                         sk_drops_add(sk, skb);
1666                         reqsk_put(req);
1667                         goto discard_it;
1668                 }
1669                 if (tcp_checksum_complete(skb)) {
1670                         reqsk_put(req);
1671                         goto csum_error;
1672                 }
1673                 if (unlikely(sk->sk_state != TCP_LISTEN)) {
1674                         nsk = reuseport_migrate_sock(sk, req_to_sk(req), skb);
1675                         if (!nsk) {
1676                                 inet_csk_reqsk_queue_drop_and_put(sk, req);
1677                                 goto lookup;
1678                         }
1679                         sk = nsk;
1680                         /* reuseport_migrate_sock() has already held one sk_refcnt
1681                          * before returning.
1682                          */
1683                 } else {
1684                         sock_hold(sk);
1685                 }
1686                 refcounted = true;
1687                 nsk = NULL;
1688                 if (!tcp_filter(sk, skb)) {
1689                         th = (const struct tcphdr *)skb->data;
1690                         hdr = ipv6_hdr(skb);
1691                         tcp_v6_fill_cb(skb, hdr, th);
1692                         nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
1693                 } else {
1694                         drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
1695                 }
1696                 if (!nsk) {
1697                         reqsk_put(req);
1698                         if (req_stolen) {
1699                                 /* Another cpu got exclusive access to req
1700                                  * and created a full blown socket.
1701                                  * Try to feed this packet to this socket
1702                                  * instead of discarding it.
1703                                  */
1704                                 tcp_v6_restore_cb(skb);
1705                                 sock_put(sk);
1706                                 goto lookup;
1707                         }
1708                         goto discard_and_relse;
1709                 }
1710                 if (nsk == sk) {
1711                         reqsk_put(req);
1712                         tcp_v6_restore_cb(skb);
1713                 } else if (tcp_child_process(sk, nsk, skb)) {
1714                         tcp_v6_send_reset(nsk, skb);
1715                         goto discard_and_relse;
1716                 } else {
1717                         sock_put(sk);
1718                         return 0;
1719                 }
1720         }
1721
1722         if (static_branch_unlikely(&ip6_min_hopcount)) {
1723                 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
1724                 if (hdr->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) {
1725                         __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1726                         goto discard_and_relse;
1727                 }
1728         }
1729
1730         if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) {
1731                 drop_reason = SKB_DROP_REASON_XFRM_POLICY;
1732                 goto discard_and_relse;
1733         }
1734
1735         drop_reason = tcp_inbound_md5_hash(sk, skb, &hdr->saddr, &hdr->daddr,
1736                                            AF_INET6, dif, sdif);
1737         if (drop_reason)
1738                 goto discard_and_relse;
1739
1740         if (tcp_filter(sk, skb)) {
1741                 drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
1742                 goto discard_and_relse;
1743         }
1744         th = (const struct tcphdr *)skb->data;
1745         hdr = ipv6_hdr(skb);
1746         tcp_v6_fill_cb(skb, hdr, th);
1747
1748         skb->dev = NULL;
1749
1750         if (sk->sk_state == TCP_LISTEN) {
1751                 ret = tcp_v6_do_rcv(sk, skb);
1752                 goto put_and_return;
1753         }
1754
1755         sk_incoming_cpu_update(sk);
1756
1757         bh_lock_sock_nested(sk);
1758         tcp_segs_in(tcp_sk(sk), skb);
1759         ret = 0;
1760         if (!sock_owned_by_user(sk)) {
1761                 ret = tcp_v6_do_rcv(sk, skb);
1762         } else {
1763                 if (tcp_add_backlog(sk, skb, &drop_reason))
1764                         goto discard_and_relse;
1765         }
1766         bh_unlock_sock(sk);
1767 put_and_return:
1768         if (refcounted)
1769                 sock_put(sk);
1770         return ret ? -1 : 0;
1771
1772 no_tcp_socket:
1773         drop_reason = SKB_DROP_REASON_NO_SOCKET;
1774         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1775                 goto discard_it;
1776
1777         tcp_v6_fill_cb(skb, hdr, th);
1778
1779         if (tcp_checksum_complete(skb)) {
1780 csum_error:
1781                 drop_reason = SKB_DROP_REASON_TCP_CSUM;
1782                 trace_tcp_bad_csum(skb);
1783                 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1784 bad_packet:
1785                 __TCP_INC_STATS(net, TCP_MIB_INERRS);
1786         } else {
1787                 tcp_v6_send_reset(NULL, skb);
1788         }
1789
1790 discard_it:
1791         SKB_DR_OR(drop_reason, NOT_SPECIFIED);
1792         kfree_skb_reason(skb, drop_reason);
1793         return 0;
1794
1795 discard_and_relse:
1796         sk_drops_add(sk, skb);
1797         if (refcounted)
1798                 sock_put(sk);
1799         goto discard_it;
1800
1801 do_time_wait:
1802         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1803                 drop_reason = SKB_DROP_REASON_XFRM_POLICY;
1804                 inet_twsk_put(inet_twsk(sk));
1805                 goto discard_it;
1806         }
1807
1808         tcp_v6_fill_cb(skb, hdr, th);
1809
1810         if (tcp_checksum_complete(skb)) {
1811                 inet_twsk_put(inet_twsk(sk));
1812                 goto csum_error;
1813         }
1814
1815         switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1816         case TCP_TW_SYN:
1817         {
1818                 struct sock *sk2;
1819
1820                 sk2 = inet6_lookup_listener(net, net->ipv4.tcp_death_row.hashinfo,
1821                                             skb, __tcp_hdrlen(th),
1822                                             &ipv6_hdr(skb)->saddr, th->source,
1823                                             &ipv6_hdr(skb)->daddr,
1824                                             ntohs(th->dest),
1825                                             tcp_v6_iif_l3_slave(skb),
1826                                             sdif);
1827                 if (sk2) {
1828                         struct inet_timewait_sock *tw = inet_twsk(sk);
1829                         inet_twsk_deschedule_put(tw);
1830                         sk = sk2;
1831                         tcp_v6_restore_cb(skb);
1832                         refcounted = false;
1833                         goto process;
1834                 }
1835         }
1836                 /* to ACK */
1837                 fallthrough;
1838         case TCP_TW_ACK:
1839                 tcp_v6_timewait_ack(sk, skb);
1840                 break;
1841         case TCP_TW_RST:
1842                 tcp_v6_send_reset(sk, skb);
1843                 inet_twsk_deschedule_put(inet_twsk(sk));
1844                 goto discard_it;
1845         case TCP_TW_SUCCESS:
1846                 ;
1847         }
1848         goto discard_it;
1849 }
1850
1851 void tcp_v6_early_demux(struct sk_buff *skb)
1852 {
1853         struct net *net = dev_net(skb->dev);
1854         const struct ipv6hdr *hdr;
1855         const struct tcphdr *th;
1856         struct sock *sk;
1857
1858         if (skb->pkt_type != PACKET_HOST)
1859                 return;
1860
1861         if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1862                 return;
1863
1864         hdr = ipv6_hdr(skb);
1865         th = tcp_hdr(skb);
1866
1867         if (th->doff < sizeof(struct tcphdr) / 4)
1868                 return;
1869
1870         /* Note : We use inet6_iif() here, not tcp_v6_iif() */
1871         sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
1872                                         &hdr->saddr, th->source,
1873                                         &hdr->daddr, ntohs(th->dest),
1874                                         inet6_iif(skb), inet6_sdif(skb));
1875         if (sk) {
1876                 skb->sk = sk;
1877                 skb->destructor = sock_edemux;
1878                 if (sk_fullsock(sk)) {
1879                         struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst);
1880
1881                         if (dst)
1882                                 dst = dst_check(dst, sk->sk_rx_dst_cookie);
1883                         if (dst &&
1884                             sk->sk_rx_dst_ifindex == skb->skb_iif)
1885                                 skb_dst_set_noref(skb, dst);
1886                 }
1887         }
1888 }
1889
1890 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1891         .twsk_obj_size  = sizeof(struct tcp6_timewait_sock),
1892         .twsk_unique    = tcp_twsk_unique,
1893         .twsk_destructor = tcp_twsk_destructor,
1894 };
1895
1896 INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)
1897 {
1898         __tcp_v6_send_check(skb, &sk->sk_v6_rcv_saddr, &sk->sk_v6_daddr);
1899 }
1900
1901 const struct inet_connection_sock_af_ops ipv6_specific = {
1902         .queue_xmit        = inet6_csk_xmit,
1903         .send_check        = tcp_v6_send_check,
1904         .rebuild_header    = inet6_sk_rebuild_header,
1905         .sk_rx_dst_set     = inet6_sk_rx_dst_set,
1906         .conn_request      = tcp_v6_conn_request,
1907         .syn_recv_sock     = tcp_v6_syn_recv_sock,
1908         .net_header_len    = sizeof(struct ipv6hdr),
1909         .net_frag_header_len = sizeof(struct frag_hdr),
1910         .setsockopt        = ipv6_setsockopt,
1911         .getsockopt        = ipv6_getsockopt,
1912         .addr2sockaddr     = inet6_csk_addr2sockaddr,
1913         .sockaddr_len      = sizeof(struct sockaddr_in6),
1914         .mtu_reduced       = tcp_v6_mtu_reduced,
1915 };
1916
1917 #ifdef CONFIG_TCP_MD5SIG
1918 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1919         .md5_lookup     =       tcp_v6_md5_lookup,
1920         .calc_md5_hash  =       tcp_v6_md5_hash_skb,
1921         .md5_parse      =       tcp_v6_parse_md5_keys,
1922 };
1923 #endif
1924
1925 /*
1926  *      TCP over IPv4 via INET6 API
1927  */
1928 static const struct inet_connection_sock_af_ops ipv6_mapped = {
1929         .queue_xmit        = ip_queue_xmit,
1930         .send_check        = tcp_v4_send_check,
1931         .rebuild_header    = inet_sk_rebuild_header,
1932         .sk_rx_dst_set     = inet_sk_rx_dst_set,
1933         .conn_request      = tcp_v6_conn_request,
1934         .syn_recv_sock     = tcp_v6_syn_recv_sock,
1935         .net_header_len    = sizeof(struct iphdr),
1936         .setsockopt        = ipv6_setsockopt,
1937         .getsockopt        = ipv6_getsockopt,
1938         .addr2sockaddr     = inet6_csk_addr2sockaddr,
1939         .sockaddr_len      = sizeof(struct sockaddr_in6),
1940         .mtu_reduced       = tcp_v4_mtu_reduced,
1941 };
1942
1943 #ifdef CONFIG_TCP_MD5SIG
1944 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1945         .md5_lookup     =       tcp_v4_md5_lookup,
1946         .calc_md5_hash  =       tcp_v4_md5_hash_skb,
1947         .md5_parse      =       tcp_v6_parse_md5_keys,
1948 };
1949 #endif
1950
1951 /* NOTE: A lot of things set to zero explicitly by call to
1952  *       sk_alloc() so need not be done here.
1953  */
1954 static int tcp_v6_init_sock(struct sock *sk)
1955 {
1956         struct inet_connection_sock *icsk = inet_csk(sk);
1957
1958         tcp_init_sock(sk);
1959
1960         icsk->icsk_af_ops = &ipv6_specific;
1961
1962 #ifdef CONFIG_TCP_MD5SIG
1963         tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1964 #endif
1965
1966         return 0;
1967 }
1968
1969 static void tcp_v6_destroy_sock(struct sock *sk)
1970 {
1971         tcp_v4_destroy_sock(sk);
1972         inet6_destroy_sock(sk);
1973 }
1974
1975 #ifdef CONFIG_PROC_FS
1976 /* Proc filesystem TCPv6 sock list dumping. */
1977 static void get_openreq6(struct seq_file *seq,
1978                          const struct request_sock *req, int i)
1979 {
1980         long ttd = req->rsk_timer.expires - jiffies;
1981         const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
1982         const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
1983
1984         if (ttd < 0)
1985                 ttd = 0;
1986
1987         seq_printf(seq,
1988                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1989                    "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
1990                    i,
1991                    src->s6_addr32[0], src->s6_addr32[1],
1992                    src->s6_addr32[2], src->s6_addr32[3],
1993                    inet_rsk(req)->ir_num,
1994                    dest->s6_addr32[0], dest->s6_addr32[1],
1995                    dest->s6_addr32[2], dest->s6_addr32[3],
1996                    ntohs(inet_rsk(req)->ir_rmt_port),
1997                    TCP_SYN_RECV,
1998                    0, 0, /* could print option size, but that is af dependent. */
1999                    1,   /* timers active (only the expire timer) */
2000                    jiffies_to_clock_t(ttd),
2001                    req->num_timeout,
2002                    from_kuid_munged(seq_user_ns(seq),
2003                                     sock_i_uid(req->rsk_listener)),
2004                    0,  /* non standard timer */
2005                    0, /* open_requests have no inode */
2006                    0, req);
2007 }
2008
2009 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
2010 {
2011         const struct in6_addr *dest, *src;
2012         __u16 destp, srcp;
2013         int timer_active;
2014         unsigned long timer_expires;
2015         const struct inet_sock *inet = inet_sk(sp);
2016         const struct tcp_sock *tp = tcp_sk(sp);
2017         const struct inet_connection_sock *icsk = inet_csk(sp);
2018         const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
2019         int rx_queue;
2020         int state;
2021
2022         dest  = &sp->sk_v6_daddr;
2023         src   = &sp->sk_v6_rcv_saddr;
2024         destp = ntohs(inet->inet_dport);
2025         srcp  = ntohs(inet->inet_sport);
2026
2027         if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
2028             icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
2029             icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
2030                 timer_active    = 1;
2031                 timer_expires   = icsk->icsk_timeout;
2032         } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2033                 timer_active    = 4;
2034                 timer_expires   = icsk->icsk_timeout;
2035         } else if (timer_pending(&sp->sk_timer)) {
2036                 timer_active    = 2;
2037                 timer_expires   = sp->sk_timer.expires;
2038         } else {
2039                 timer_active    = 0;
2040                 timer_expires = jiffies;
2041         }
2042
2043         state = inet_sk_state_load(sp);
2044         if (state == TCP_LISTEN)
2045                 rx_queue = READ_ONCE(sp->sk_ack_backlog);
2046         else
2047                 /* Because we don't lock the socket,
2048                  * we might find a transient negative value.
2049                  */
2050                 rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
2051                                       READ_ONCE(tp->copied_seq), 0);
2052
2053         seq_printf(seq,
2054                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2055                    "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
2056                    i,
2057                    src->s6_addr32[0], src->s6_addr32[1],
2058                    src->s6_addr32[2], src->s6_addr32[3], srcp,
2059                    dest->s6_addr32[0], dest->s6_addr32[1],
2060                    dest->s6_addr32[2], dest->s6_addr32[3], destp,
2061                    state,
2062                    READ_ONCE(tp->write_seq) - tp->snd_una,
2063                    rx_queue,
2064                    timer_active,
2065                    jiffies_delta_to_clock_t(timer_expires - jiffies),
2066                    icsk->icsk_retransmits,
2067                    from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
2068                    icsk->icsk_probes_out,
2069                    sock_i_ino(sp),
2070                    refcount_read(&sp->sk_refcnt), sp,
2071                    jiffies_to_clock_t(icsk->icsk_rto),
2072                    jiffies_to_clock_t(icsk->icsk_ack.ato),
2073                    (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp),
2074                    tcp_snd_cwnd(tp),
2075                    state == TCP_LISTEN ?
2076                         fastopenq->max_qlen :
2077                         (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
2078                    );
2079 }
2080
2081 static void get_timewait6_sock(struct seq_file *seq,
2082                                struct inet_timewait_sock *tw, int i)
2083 {
2084         long delta = tw->tw_timer.expires - jiffies;
2085         const struct in6_addr *dest, *src;
2086         __u16 destp, srcp;
2087
2088         dest = &tw->tw_v6_daddr;
2089         src  = &tw->tw_v6_rcv_saddr;
2090         destp = ntohs(tw->tw_dport);
2091         srcp  = ntohs(tw->tw_sport);
2092
2093         seq_printf(seq,
2094                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2095                    "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
2096                    i,
2097                    src->s6_addr32[0], src->s6_addr32[1],
2098                    src->s6_addr32[2], src->s6_addr32[3], srcp,
2099                    dest->s6_addr32[0], dest->s6_addr32[1],
2100                    dest->s6_addr32[2], dest->s6_addr32[3], destp,
2101                    tw->tw_substate, 0, 0,
2102                    3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
2103                    refcount_read(&tw->tw_refcnt), tw);
2104 }
2105
2106 static int tcp6_seq_show(struct seq_file *seq, void *v)
2107 {
2108         struct tcp_iter_state *st;
2109         struct sock *sk = v;
2110
2111         if (v == SEQ_START_TOKEN) {
2112                 seq_puts(seq,
2113                          "  sl  "
2114                          "local_address                         "
2115                          "remote_address                        "
2116                          "st tx_queue rx_queue tr tm->when retrnsmt"
2117                          "   uid  timeout inode\n");
2118                 goto out;
2119         }
2120         st = seq->private;
2121
2122         if (sk->sk_state == TCP_TIME_WAIT)
2123                 get_timewait6_sock(seq, v, st->num);
2124         else if (sk->sk_state == TCP_NEW_SYN_RECV)
2125                 get_openreq6(seq, v, st->num);
2126         else
2127                 get_tcp6_sock(seq, v, st->num);
2128 out:
2129         return 0;
2130 }
2131
2132 static const struct seq_operations tcp6_seq_ops = {
2133         .show           = tcp6_seq_show,
2134         .start          = tcp_seq_start,
2135         .next           = tcp_seq_next,
2136         .stop           = tcp_seq_stop,
2137 };
2138
2139 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2140         .family         = AF_INET6,
2141 };
2142
2143 int __net_init tcp6_proc_init(struct net *net)
2144 {
2145         if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops,
2146                         sizeof(struct tcp_iter_state), &tcp6_seq_afinfo))
2147                 return -ENOMEM;
2148         return 0;
2149 }
2150
2151 void tcp6_proc_exit(struct net *net)
2152 {
2153         remove_proc_entry("tcp6", net->proc_net);
2154 }
2155 #endif
2156
2157 struct proto tcpv6_prot = {
2158         .name                   = "TCPv6",
2159         .owner                  = THIS_MODULE,
2160         .close                  = tcp_close,
2161         .pre_connect            = tcp_v6_pre_connect,
2162         .connect                = tcp_v6_connect,
2163         .disconnect             = tcp_disconnect,
2164         .accept                 = inet_csk_accept,
2165         .ioctl                  = tcp_ioctl,
2166         .init                   = tcp_v6_init_sock,
2167         .destroy                = tcp_v6_destroy_sock,
2168         .shutdown               = tcp_shutdown,
2169         .setsockopt             = tcp_setsockopt,
2170         .getsockopt             = tcp_getsockopt,
2171         .bpf_bypass_getsockopt  = tcp_bpf_bypass_getsockopt,
2172         .keepalive              = tcp_set_keepalive,
2173         .recvmsg                = tcp_recvmsg,
2174         .sendmsg                = tcp_sendmsg,
2175         .sendpage               = tcp_sendpage,
2176         .backlog_rcv            = tcp_v6_do_rcv,
2177         .release_cb             = tcp_release_cb,
2178         .hash                   = inet6_hash,
2179         .unhash                 = inet_unhash,
2180         .get_port               = inet_csk_get_port,
2181         .put_port               = inet_put_port,
2182 #ifdef CONFIG_BPF_SYSCALL
2183         .psock_update_sk_prot   = tcp_bpf_update_proto,
2184 #endif
2185         .enter_memory_pressure  = tcp_enter_memory_pressure,
2186         .leave_memory_pressure  = tcp_leave_memory_pressure,
2187         .stream_memory_free     = tcp_stream_memory_free,
2188         .sockets_allocated      = &tcp_sockets_allocated,
2189
2190         .memory_allocated       = &tcp_memory_allocated,
2191         .per_cpu_fw_alloc       = &tcp_memory_per_cpu_fw_alloc,
2192
2193         .memory_pressure        = &tcp_memory_pressure,
2194         .orphan_count           = &tcp_orphan_count,
2195         .sysctl_mem             = sysctl_tcp_mem,
2196         .sysctl_wmem_offset     = offsetof(struct net, ipv4.sysctl_tcp_wmem),
2197         .sysctl_rmem_offset     = offsetof(struct net, ipv4.sysctl_tcp_rmem),
2198         .max_header             = MAX_TCP_HEADER,
2199         .obj_size               = sizeof(struct tcp6_sock),
2200         .slab_flags             = SLAB_TYPESAFE_BY_RCU,
2201         .twsk_prot              = &tcp6_timewait_sock_ops,
2202         .rsk_prot               = &tcp6_request_sock_ops,
2203         .h.hashinfo             = NULL,
2204         .no_autobind            = true,
2205         .diag_destroy           = tcp_abort,
2206 };
2207 EXPORT_SYMBOL_GPL(tcpv6_prot);
2208
2209 static const struct inet6_protocol tcpv6_protocol = {
2210         .handler        =       tcp_v6_rcv,
2211         .err_handler    =       tcp_v6_err,
2212         .flags          =       INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2213 };
2214
2215 static struct inet_protosw tcpv6_protosw = {
2216         .type           =       SOCK_STREAM,
2217         .protocol       =       IPPROTO_TCP,
2218         .prot           =       &tcpv6_prot,
2219         .ops            =       &inet6_stream_ops,
2220         .flags          =       INET_PROTOSW_PERMANENT |
2221                                 INET_PROTOSW_ICSK,
2222 };
2223
2224 static int __net_init tcpv6_net_init(struct net *net)
2225 {
2226         return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2227                                     SOCK_RAW, IPPROTO_TCP, net);
2228 }
2229
2230 static void __net_exit tcpv6_net_exit(struct net *net)
2231 {
2232         inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2233 }
2234
2235 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
2236 {
2237         tcp_twsk_purge(net_exit_list, AF_INET6);
2238 }
2239
2240 static struct pernet_operations tcpv6_net_ops = {
2241         .init       = tcpv6_net_init,
2242         .exit       = tcpv6_net_exit,
2243         .exit_batch = tcpv6_net_exit_batch,
2244 };
2245
2246 int __init tcpv6_init(void)
2247 {
2248         int ret;
2249
2250         ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2251         if (ret)
2252                 goto out;
2253
2254         /* register inet6 protocol */
2255         ret = inet6_register_protosw(&tcpv6_protosw);
2256         if (ret)
2257                 goto out_tcpv6_protocol;
2258
2259         ret = register_pernet_subsys(&tcpv6_net_ops);
2260         if (ret)
2261                 goto out_tcpv6_protosw;
2262
2263         ret = mptcpv6_init();
2264         if (ret)
2265                 goto out_tcpv6_pernet_subsys;
2266
2267 out:
2268         return ret;
2269
2270 out_tcpv6_pernet_subsys:
2271         unregister_pernet_subsys(&tcpv6_net_ops);
2272 out_tcpv6_protosw:
2273         inet6_unregister_protosw(&tcpv6_protosw);
2274 out_tcpv6_protocol:
2275         inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2276         goto out;
2277 }
2278
2279 void tcpv6_exit(void)
2280 {
2281         unregister_pernet_subsys(&tcpv6_net_ops);
2282         inet6_unregister_protosw(&tcpv6_protosw);
2283         inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2284 }