tcp: Clean up some functions.
[platform/kernel/linux-starfive.git] / net / ipv6 / tcp_ipv6.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *      TCP over IPv6
4  *      Linux INET6 implementation
5  *
6  *      Authors:
7  *      Pedro Roque             <roque@di.fc.ul.pt>
8  *
9  *      Based on:
10  *      linux/net/ipv4/tcp.c
11  *      linux/net/ipv4/tcp_input.c
12  *      linux/net/ipv4/tcp_output.c
13  *
14  *      Fixes:
15  *      Hideaki YOSHIFUJI       :       sin6_scope_id support
16  *      YOSHIFUJI Hideaki @USAGI and:   Support IPV6_V6ONLY socket option, which
17  *      Alexey Kuznetsov                allow both IPv4 and IPv6 sockets to bind
18  *                                      a single port at the same time.
19  *      YOSHIFUJI Hideaki @USAGI:       convert /proc/net/tcp6 to seq_file.
20  */
21
22 #include <linux/bottom_half.h>
23 #include <linux/module.h>
24 #include <linux/errno.h>
25 #include <linux/types.h>
26 #include <linux/socket.h>
27 #include <linux/sockios.h>
28 #include <linux/net.h>
29 #include <linux/jiffies.h>
30 #include <linux/in.h>
31 #include <linux/in6.h>
32 #include <linux/netdevice.h>
33 #include <linux/init.h>
34 #include <linux/jhash.h>
35 #include <linux/ipsec.h>
36 #include <linux/times.h>
37 #include <linux/slab.h>
38 #include <linux/uaccess.h>
39 #include <linux/ipv6.h>
40 #include <linux/icmpv6.h>
41 #include <linux/random.h>
42 #include <linux/indirect_call_wrapper.h>
43
44 #include <net/tcp.h>
45 #include <net/ndisc.h>
46 #include <net/inet6_hashtables.h>
47 #include <net/inet6_connection_sock.h>
48 #include <net/ipv6.h>
49 #include <net/transp_v6.h>
50 #include <net/addrconf.h>
51 #include <net/ip6_route.h>
52 #include <net/ip6_checksum.h>
53 #include <net/inet_ecn.h>
54 #include <net/protocol.h>
55 #include <net/xfrm.h>
56 #include <net/snmp.h>
57 #include <net/dsfield.h>
58 #include <net/timewait_sock.h>
59 #include <net/inet_common.h>
60 #include <net/secure_seq.h>
61 #include <net/busy_poll.h>
62
63 #include <linux/proc_fs.h>
64 #include <linux/seq_file.h>
65
66 #include <crypto/hash.h>
67 #include <linux/scatterlist.h>
68
69 #include <trace/events/tcp.h>
70
71 static void     tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
72 static void     tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
73                                       struct request_sock *req);
74
75 INDIRECT_CALLABLE_SCOPE int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
76
77 static const struct inet_connection_sock_af_ops ipv6_mapped;
78 const struct inet_connection_sock_af_ops ipv6_specific;
79 #ifdef CONFIG_TCP_MD5SIG
80 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
81 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
82 #else
83 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
84                                                    const struct in6_addr *addr,
85                                                    int l3index)
86 {
87         return NULL;
88 }
89 #endif
90
91 /* Helper returning the inet6 address from a given tcp socket.
92  * It can be used in TCP stack instead of inet6_sk(sk).
93  * This avoids a dereference and allow compiler optimizations.
94  * It is a specialized version of inet6_sk_generic().
95  */
96 static struct ipv6_pinfo *tcp_inet6_sk(const struct sock *sk)
97 {
98         unsigned int offset = sizeof(struct tcp6_sock) - sizeof(struct ipv6_pinfo);
99
100         return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
101 }
102
103 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
104 {
105         struct dst_entry *dst = skb_dst(skb);
106
107         if (dst && dst_hold_safe(dst)) {
108                 const struct rt6_info *rt = (const struct rt6_info *)dst;
109
110                 rcu_assign_pointer(sk->sk_rx_dst, dst);
111                 sk->sk_rx_dst_ifindex = skb->skb_iif;
112                 sk->sk_rx_dst_cookie = rt6_get_cookie(rt);
113         }
114 }
115
116 static u32 tcp_v6_init_seq(const struct sk_buff *skb)
117 {
118         return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32,
119                                 ipv6_hdr(skb)->saddr.s6_addr32,
120                                 tcp_hdr(skb)->dest,
121                                 tcp_hdr(skb)->source);
122 }
123
124 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
125 {
126         return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32,
127                                    ipv6_hdr(skb)->saddr.s6_addr32);
128 }
129
130 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
131                               int addr_len)
132 {
133         /* This check is replicated from tcp_v6_connect() and intended to
134          * prevent BPF program called below from accessing bytes that are out
135          * of the bound specified by user in addr_len.
136          */
137         if (addr_len < SIN6_LEN_RFC2133)
138                 return -EINVAL;
139
140         sock_owned_by_me(sk);
141
142         return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr);
143 }
144
145 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
146                           int addr_len)
147 {
148         struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
149         struct inet_connection_sock *icsk = inet_csk(sk);
150         struct in6_addr *saddr = NULL, *final_p, final;
151         struct inet_timewait_death_row *tcp_death_row;
152         struct ipv6_pinfo *np = tcp_inet6_sk(sk);
153         struct inet_sock *inet = inet_sk(sk);
154         struct tcp_sock *tp = tcp_sk(sk);
155         struct net *net = sock_net(sk);
156         struct ipv6_txoptions *opt;
157         struct dst_entry *dst;
158         struct flowi6 fl6;
159         int addr_type;
160         int err;
161
162         if (addr_len < SIN6_LEN_RFC2133)
163                 return -EINVAL;
164
165         if (usin->sin6_family != AF_INET6)
166                 return -EAFNOSUPPORT;
167
168         memset(&fl6, 0, sizeof(fl6));
169
170         if (np->sndflow) {
171                 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
172                 IP6_ECN_flow_init(fl6.flowlabel);
173                 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
174                         struct ip6_flowlabel *flowlabel;
175                         flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
176                         if (IS_ERR(flowlabel))
177                                 return -EINVAL;
178                         fl6_sock_release(flowlabel);
179                 }
180         }
181
182         /*
183          *      connect() to INADDR_ANY means loopback (BSD'ism).
184          */
185
186         if (ipv6_addr_any(&usin->sin6_addr)) {
187                 if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
188                         ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
189                                                &usin->sin6_addr);
190                 else
191                         usin->sin6_addr = in6addr_loopback;
192         }
193
194         addr_type = ipv6_addr_type(&usin->sin6_addr);
195
196         if (addr_type & IPV6_ADDR_MULTICAST)
197                 return -ENETUNREACH;
198
199         if (addr_type&IPV6_ADDR_LINKLOCAL) {
200                 if (addr_len >= sizeof(struct sockaddr_in6) &&
201                     usin->sin6_scope_id) {
202                         /* If interface is set while binding, indices
203                          * must coincide.
204                          */
205                         if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id))
206                                 return -EINVAL;
207
208                         sk->sk_bound_dev_if = usin->sin6_scope_id;
209                 }
210
211                 /* Connect to link-local address requires an interface */
212                 if (!sk->sk_bound_dev_if)
213                         return -EINVAL;
214         }
215
216         if (tp->rx_opt.ts_recent_stamp &&
217             !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
218                 tp->rx_opt.ts_recent = 0;
219                 tp->rx_opt.ts_recent_stamp = 0;
220                 WRITE_ONCE(tp->write_seq, 0);
221         }
222
223         sk->sk_v6_daddr = usin->sin6_addr;
224         np->flow_label = fl6.flowlabel;
225
226         /*
227          *      TCP over IPv4
228          */
229
230         if (addr_type & IPV6_ADDR_MAPPED) {
231                 u32 exthdrlen = icsk->icsk_ext_hdr_len;
232                 struct sockaddr_in sin;
233
234                 if (ipv6_only_sock(sk))
235                         return -ENETUNREACH;
236
237                 sin.sin_family = AF_INET;
238                 sin.sin_port = usin->sin6_port;
239                 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
240
241                 icsk->icsk_af_ops = &ipv6_mapped;
242                 if (sk_is_mptcp(sk))
243                         mptcpv6_handle_mapped(sk, true);
244                 sk->sk_backlog_rcv = tcp_v4_do_rcv;
245 #ifdef CONFIG_TCP_MD5SIG
246                 tp->af_specific = &tcp_sock_ipv6_mapped_specific;
247 #endif
248
249                 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
250
251                 if (err) {
252                         icsk->icsk_ext_hdr_len = exthdrlen;
253                         icsk->icsk_af_ops = &ipv6_specific;
254                         if (sk_is_mptcp(sk))
255                                 mptcpv6_handle_mapped(sk, false);
256                         sk->sk_backlog_rcv = tcp_v6_do_rcv;
257 #ifdef CONFIG_TCP_MD5SIG
258                         tp->af_specific = &tcp_sock_ipv6_specific;
259 #endif
260                         goto failure;
261                 }
262                 np->saddr = sk->sk_v6_rcv_saddr;
263
264                 return err;
265         }
266
267         if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
268                 saddr = &sk->sk_v6_rcv_saddr;
269
270         fl6.flowi6_proto = IPPROTO_TCP;
271         fl6.daddr = sk->sk_v6_daddr;
272         fl6.saddr = saddr ? *saddr : np->saddr;
273         fl6.flowi6_oif = sk->sk_bound_dev_if;
274         fl6.flowi6_mark = sk->sk_mark;
275         fl6.fl6_dport = usin->sin6_port;
276         fl6.fl6_sport = inet->inet_sport;
277         fl6.flowi6_uid = sk->sk_uid;
278
279         opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
280         final_p = fl6_update_dst(&fl6, opt, &final);
281
282         security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
283
284         dst = ip6_dst_lookup_flow(net, sk, &fl6, final_p);
285         if (IS_ERR(dst)) {
286                 err = PTR_ERR(dst);
287                 goto failure;
288         }
289
290         if (!saddr) {
291                 struct inet_bind_hashbucket *prev_addr_hashbucket = NULL;
292                 struct in6_addr prev_v6_rcv_saddr;
293
294                 if (icsk->icsk_bind2_hash) {
295                         prev_addr_hashbucket = inet_bhashfn_portaddr(&tcp_hashinfo,
296                                                                      sk, net, inet->inet_num);
297                         prev_v6_rcv_saddr = sk->sk_v6_rcv_saddr;
298                 }
299                 saddr = &fl6.saddr;
300                 sk->sk_v6_rcv_saddr = *saddr;
301
302                 if (prev_addr_hashbucket) {
303                         err = inet_bhash2_update_saddr(prev_addr_hashbucket, sk);
304                         if (err) {
305                                 sk->sk_v6_rcv_saddr = prev_v6_rcv_saddr;
306                                 goto failure;
307                         }
308                 }
309         }
310
311         /* set the source address */
312         np->saddr = *saddr;
313         inet->inet_rcv_saddr = LOOPBACK4_IPV6;
314
315         sk->sk_gso_type = SKB_GSO_TCPV6;
316         ip6_dst_store(sk, dst, NULL, NULL);
317
318         icsk->icsk_ext_hdr_len = 0;
319         if (opt)
320                 icsk->icsk_ext_hdr_len = opt->opt_flen +
321                                          opt->opt_nflen;
322
323         tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
324
325         inet->inet_dport = usin->sin6_port;
326
327         tcp_set_state(sk, TCP_SYN_SENT);
328         tcp_death_row = net->ipv4.tcp_death_row;
329         err = inet6_hash_connect(tcp_death_row, sk);
330         if (err)
331                 goto late_failure;
332
333         sk_set_txhash(sk);
334
335         if (likely(!tp->repair)) {
336                 if (!tp->write_seq)
337                         WRITE_ONCE(tp->write_seq,
338                                    secure_tcpv6_seq(np->saddr.s6_addr32,
339                                                     sk->sk_v6_daddr.s6_addr32,
340                                                     inet->inet_sport,
341                                                     inet->inet_dport));
342                 tp->tsoffset = secure_tcpv6_ts_off(net, np->saddr.s6_addr32,
343                                                    sk->sk_v6_daddr.s6_addr32);
344         }
345
346         if (tcp_fastopen_defer_connect(sk, &err))
347                 return err;
348         if (err)
349                 goto late_failure;
350
351         err = tcp_connect(sk);
352         if (err)
353                 goto late_failure;
354
355         return 0;
356
357 late_failure:
358         tcp_set_state(sk, TCP_CLOSE);
359 failure:
360         inet->inet_dport = 0;
361         sk->sk_route_caps = 0;
362         return err;
363 }
364
365 static void tcp_v6_mtu_reduced(struct sock *sk)
366 {
367         struct dst_entry *dst;
368         u32 mtu;
369
370         if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
371                 return;
372
373         mtu = READ_ONCE(tcp_sk(sk)->mtu_info);
374
375         /* Drop requests trying to increase our current mss.
376          * Check done in __ip6_rt_update_pmtu() is too late.
377          */
378         if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache)
379                 return;
380
381         dst = inet6_csk_update_pmtu(sk, mtu);
382         if (!dst)
383                 return;
384
385         if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
386                 tcp_sync_mss(sk, dst_mtu(dst));
387                 tcp_simple_retransmit(sk);
388         }
389 }
390
391 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
392                 u8 type, u8 code, int offset, __be32 info)
393 {
394         const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
395         const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
396         struct net *net = dev_net(skb->dev);
397         struct request_sock *fastopen;
398         struct ipv6_pinfo *np;
399         struct tcp_sock *tp;
400         __u32 seq, snd_una;
401         struct sock *sk;
402         bool fatal;
403         int err;
404
405         sk = __inet6_lookup_established(net, &tcp_hashinfo,
406                                         &hdr->daddr, th->dest,
407                                         &hdr->saddr, ntohs(th->source),
408                                         skb->dev->ifindex, inet6_sdif(skb));
409
410         if (!sk) {
411                 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
412                                   ICMP6_MIB_INERRORS);
413                 return -ENOENT;
414         }
415
416         if (sk->sk_state == TCP_TIME_WAIT) {
417                 inet_twsk_put(inet_twsk(sk));
418                 return 0;
419         }
420         seq = ntohl(th->seq);
421         fatal = icmpv6_err_convert(type, code, &err);
422         if (sk->sk_state == TCP_NEW_SYN_RECV) {
423                 tcp_req_err(sk, seq, fatal);
424                 return 0;
425         }
426
427         bh_lock_sock(sk);
428         if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
429                 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
430
431         if (sk->sk_state == TCP_CLOSE)
432                 goto out;
433
434         if (static_branch_unlikely(&ip6_min_hopcount)) {
435                 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
436                 if (ipv6_hdr(skb)->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) {
437                         __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
438                         goto out;
439                 }
440         }
441
442         tp = tcp_sk(sk);
443         /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
444         fastopen = rcu_dereference(tp->fastopen_rsk);
445         snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
446         if (sk->sk_state != TCP_LISTEN &&
447             !between(seq, snd_una, tp->snd_nxt)) {
448                 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
449                 goto out;
450         }
451
452         np = tcp_inet6_sk(sk);
453
454         if (type == NDISC_REDIRECT) {
455                 if (!sock_owned_by_user(sk)) {
456                         struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
457
458                         if (dst)
459                                 dst->ops->redirect(dst, sk, skb);
460                 }
461                 goto out;
462         }
463
464         if (type == ICMPV6_PKT_TOOBIG) {
465                 u32 mtu = ntohl(info);
466
467                 /* We are not interested in TCP_LISTEN and open_requests
468                  * (SYN-ACKs send out by Linux are always <576bytes so
469                  * they should go through unfragmented).
470                  */
471                 if (sk->sk_state == TCP_LISTEN)
472                         goto out;
473
474                 if (!ip6_sk_accept_pmtu(sk))
475                         goto out;
476
477                 if (mtu < IPV6_MIN_MTU)
478                         goto out;
479
480                 WRITE_ONCE(tp->mtu_info, mtu);
481
482                 if (!sock_owned_by_user(sk))
483                         tcp_v6_mtu_reduced(sk);
484                 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
485                                            &sk->sk_tsq_flags))
486                         sock_hold(sk);
487                 goto out;
488         }
489
490
491         /* Might be for an request_sock */
492         switch (sk->sk_state) {
493         case TCP_SYN_SENT:
494         case TCP_SYN_RECV:
495                 /* Only in fast or simultaneous open. If a fast open socket is
496                  * already accepted it is treated as a connected one below.
497                  */
498                 if (fastopen && !fastopen->sk)
499                         break;
500
501                 ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th);
502
503                 if (!sock_owned_by_user(sk)) {
504                         sk->sk_err = err;
505                         sk_error_report(sk);            /* Wake people up to see the error (see connect in sock.c) */
506
507                         tcp_done(sk);
508                 } else
509                         sk->sk_err_soft = err;
510                 goto out;
511         case TCP_LISTEN:
512                 break;
513         default:
514                 /* check if this ICMP message allows revert of backoff.
515                  * (see RFC 6069)
516                  */
517                 if (!fastopen && type == ICMPV6_DEST_UNREACH &&
518                     code == ICMPV6_NOROUTE)
519                         tcp_ld_RTO_revert(sk, seq);
520         }
521
522         if (!sock_owned_by_user(sk) && np->recverr) {
523                 sk->sk_err = err;
524                 sk_error_report(sk);
525         } else
526                 sk->sk_err_soft = err;
527
528 out:
529         bh_unlock_sock(sk);
530         sock_put(sk);
531         return 0;
532 }
533
534
535 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
536                               struct flowi *fl,
537                               struct request_sock *req,
538                               struct tcp_fastopen_cookie *foc,
539                               enum tcp_synack_type synack_type,
540                               struct sk_buff *syn_skb)
541 {
542         struct inet_request_sock *ireq = inet_rsk(req);
543         struct ipv6_pinfo *np = tcp_inet6_sk(sk);
544         struct ipv6_txoptions *opt;
545         struct flowi6 *fl6 = &fl->u.ip6;
546         struct sk_buff *skb;
547         int err = -ENOMEM;
548         u8 tclass;
549
550         /* First, grab a route. */
551         if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
552                                                IPPROTO_TCP)) == NULL)
553                 goto done;
554
555         skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb);
556
557         if (skb) {
558                 __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
559                                     &ireq->ir_v6_rmt_addr);
560
561                 fl6->daddr = ireq->ir_v6_rmt_addr;
562                 if (np->repflow && ireq->pktopts)
563                         fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
564
565                 tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ?
566                                 (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
567                                 (np->tclass & INET_ECN_MASK) :
568                                 np->tclass;
569
570                 if (!INET_ECN_is_capable(tclass) &&
571                     tcp_bpf_ca_needs_ecn((struct sock *)req))
572                         tclass |= INET_ECN_ECT_0;
573
574                 rcu_read_lock();
575                 opt = ireq->ipv6_opt;
576                 if (!opt)
577                         opt = rcu_dereference(np->opt);
578                 err = ip6_xmit(sk, skb, fl6, skb->mark ? : sk->sk_mark, opt,
579                                tclass, sk->sk_priority);
580                 rcu_read_unlock();
581                 err = net_xmit_eval(err);
582         }
583
584 done:
585         return err;
586 }
587
588
589 static void tcp_v6_reqsk_destructor(struct request_sock *req)
590 {
591         kfree(inet_rsk(req)->ipv6_opt);
592         consume_skb(inet_rsk(req)->pktopts);
593 }
594
595 #ifdef CONFIG_TCP_MD5SIG
596 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
597                                                    const struct in6_addr *addr,
598                                                    int l3index)
599 {
600         return tcp_md5_do_lookup(sk, l3index,
601                                  (union tcp_md5_addr *)addr, AF_INET6);
602 }
603
604 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
605                                                 const struct sock *addr_sk)
606 {
607         int l3index;
608
609         l3index = l3mdev_master_ifindex_by_index(sock_net(sk),
610                                                  addr_sk->sk_bound_dev_if);
611         return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr,
612                                     l3index);
613 }
614
615 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
616                                  sockptr_t optval, int optlen)
617 {
618         struct tcp_md5sig cmd;
619         struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
620         int l3index = 0;
621         u8 prefixlen;
622         u8 flags;
623
624         if (optlen < sizeof(cmd))
625                 return -EINVAL;
626
627         if (copy_from_sockptr(&cmd, optval, sizeof(cmd)))
628                 return -EFAULT;
629
630         if (sin6->sin6_family != AF_INET6)
631                 return -EINVAL;
632
633         flags = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX;
634
635         if (optname == TCP_MD5SIG_EXT &&
636             cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
637                 prefixlen = cmd.tcpm_prefixlen;
638                 if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
639                                         prefixlen > 32))
640                         return -EINVAL;
641         } else {
642                 prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
643         }
644
645         if (optname == TCP_MD5SIG_EXT && cmd.tcpm_ifindex &&
646             cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) {
647                 struct net_device *dev;
648
649                 rcu_read_lock();
650                 dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex);
651                 if (dev && netif_is_l3_master(dev))
652                         l3index = dev->ifindex;
653                 rcu_read_unlock();
654
655                 /* ok to reference set/not set outside of rcu;
656                  * right now device MUST be an L3 master
657                  */
658                 if (!dev || !l3index)
659                         return -EINVAL;
660         }
661
662         if (!cmd.tcpm_keylen) {
663                 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
664                         return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
665                                               AF_INET, prefixlen,
666                                               l3index, flags);
667                 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
668                                       AF_INET6, prefixlen, l3index, flags);
669         }
670
671         if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
672                 return -EINVAL;
673
674         if (ipv6_addr_v4mapped(&sin6->sin6_addr))
675                 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
676                                       AF_INET, prefixlen, l3index, flags,
677                                       cmd.tcpm_key, cmd.tcpm_keylen,
678                                       GFP_KERNEL);
679
680         return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
681                               AF_INET6, prefixlen, l3index, flags,
682                               cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
683 }
684
685 static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
686                                    const struct in6_addr *daddr,
687                                    const struct in6_addr *saddr,
688                                    const struct tcphdr *th, int nbytes)
689 {
690         struct tcp6_pseudohdr *bp;
691         struct scatterlist sg;
692         struct tcphdr *_th;
693
694         bp = hp->scratch;
695         /* 1. TCP pseudo-header (RFC2460) */
696         bp->saddr = *saddr;
697         bp->daddr = *daddr;
698         bp->protocol = cpu_to_be32(IPPROTO_TCP);
699         bp->len = cpu_to_be32(nbytes);
700
701         _th = (struct tcphdr *)(bp + 1);
702         memcpy(_th, th, sizeof(*th));
703         _th->check = 0;
704
705         sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
706         ahash_request_set_crypt(hp->md5_req, &sg, NULL,
707                                 sizeof(*bp) + sizeof(*th));
708         return crypto_ahash_update(hp->md5_req);
709 }
710
711 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
712                                const struct in6_addr *daddr, struct in6_addr *saddr,
713                                const struct tcphdr *th)
714 {
715         struct tcp_md5sig_pool *hp;
716         struct ahash_request *req;
717
718         hp = tcp_get_md5sig_pool();
719         if (!hp)
720                 goto clear_hash_noput;
721         req = hp->md5_req;
722
723         if (crypto_ahash_init(req))
724                 goto clear_hash;
725         if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
726                 goto clear_hash;
727         if (tcp_md5_hash_key(hp, key))
728                 goto clear_hash;
729         ahash_request_set_crypt(req, NULL, md5_hash, 0);
730         if (crypto_ahash_final(req))
731                 goto clear_hash;
732
733         tcp_put_md5sig_pool();
734         return 0;
735
736 clear_hash:
737         tcp_put_md5sig_pool();
738 clear_hash_noput:
739         memset(md5_hash, 0, 16);
740         return 1;
741 }
742
743 static int tcp_v6_md5_hash_skb(char *md5_hash,
744                                const struct tcp_md5sig_key *key,
745                                const struct sock *sk,
746                                const struct sk_buff *skb)
747 {
748         const struct in6_addr *saddr, *daddr;
749         struct tcp_md5sig_pool *hp;
750         struct ahash_request *req;
751         const struct tcphdr *th = tcp_hdr(skb);
752
753         if (sk) { /* valid for establish/request sockets */
754                 saddr = &sk->sk_v6_rcv_saddr;
755                 daddr = &sk->sk_v6_daddr;
756         } else {
757                 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
758                 saddr = &ip6h->saddr;
759                 daddr = &ip6h->daddr;
760         }
761
762         hp = tcp_get_md5sig_pool();
763         if (!hp)
764                 goto clear_hash_noput;
765         req = hp->md5_req;
766
767         if (crypto_ahash_init(req))
768                 goto clear_hash;
769
770         if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
771                 goto clear_hash;
772         if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
773                 goto clear_hash;
774         if (tcp_md5_hash_key(hp, key))
775                 goto clear_hash;
776         ahash_request_set_crypt(req, NULL, md5_hash, 0);
777         if (crypto_ahash_final(req))
778                 goto clear_hash;
779
780         tcp_put_md5sig_pool();
781         return 0;
782
783 clear_hash:
784         tcp_put_md5sig_pool();
785 clear_hash_noput:
786         memset(md5_hash, 0, 16);
787         return 1;
788 }
789
790 #endif
791
792 static void tcp_v6_init_req(struct request_sock *req,
793                             const struct sock *sk_listener,
794                             struct sk_buff *skb)
795 {
796         bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
797         struct inet_request_sock *ireq = inet_rsk(req);
798         const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener);
799
800         ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
801         ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
802
803         /* So that link locals have meaning */
804         if ((!sk_listener->sk_bound_dev_if || l3_slave) &&
805             ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
806                 ireq->ir_iif = tcp_v6_iif(skb);
807
808         if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
809             (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
810              np->rxopt.bits.rxinfo ||
811              np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
812              np->rxopt.bits.rxohlim || np->repflow)) {
813                 refcount_inc(&skb->users);
814                 ireq->pktopts = skb;
815         }
816 }
817
818 static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
819                                           struct sk_buff *skb,
820                                           struct flowi *fl,
821                                           struct request_sock *req)
822 {
823         tcp_v6_init_req(req, sk, skb);
824
825         if (security_inet_conn_request(sk, skb, req))
826                 return NULL;
827
828         return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
829 }
830
831 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
832         .family         =       AF_INET6,
833         .obj_size       =       sizeof(struct tcp6_request_sock),
834         .rtx_syn_ack    =       tcp_rtx_synack,
835         .send_ack       =       tcp_v6_reqsk_send_ack,
836         .destructor     =       tcp_v6_reqsk_destructor,
837         .send_reset     =       tcp_v6_send_reset,
838         .syn_ack_timeout =      tcp_syn_ack_timeout,
839 };
840
841 const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
842         .mss_clamp      =       IPV6_MIN_MTU - sizeof(struct tcphdr) -
843                                 sizeof(struct ipv6hdr),
844 #ifdef CONFIG_TCP_MD5SIG
845         .req_md5_lookup =       tcp_v6_md5_lookup,
846         .calc_md5_hash  =       tcp_v6_md5_hash_skb,
847 #endif
848 #ifdef CONFIG_SYN_COOKIES
849         .cookie_init_seq =      cookie_v6_init_sequence,
850 #endif
851         .route_req      =       tcp_v6_route_req,
852         .init_seq       =       tcp_v6_init_seq,
853         .init_ts_off    =       tcp_v6_init_ts_off,
854         .send_synack    =       tcp_v6_send_synack,
855 };
856
857 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
858                                  u32 ack, u32 win, u32 tsval, u32 tsecr,
859                                  int oif, struct tcp_md5sig_key *key, int rst,
860                                  u8 tclass, __be32 label, u32 priority, u32 txhash)
861 {
862         const struct tcphdr *th = tcp_hdr(skb);
863         struct tcphdr *t1;
864         struct sk_buff *buff;
865         struct flowi6 fl6;
866         struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
867         struct sock *ctl_sk = net->ipv6.tcp_sk;
868         unsigned int tot_len = sizeof(struct tcphdr);
869         __be32 mrst = 0, *topt;
870         struct dst_entry *dst;
871         __u32 mark = 0;
872
873         if (tsecr)
874                 tot_len += TCPOLEN_TSTAMP_ALIGNED;
875 #ifdef CONFIG_TCP_MD5SIG
876         if (key)
877                 tot_len += TCPOLEN_MD5SIG_ALIGNED;
878 #endif
879
880 #ifdef CONFIG_MPTCP
881         if (rst && !key) {
882                 mrst = mptcp_reset_option(skb);
883
884                 if (mrst)
885                         tot_len += sizeof(__be32);
886         }
887 #endif
888
889         buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
890         if (!buff)
891                 return;
892
893         skb_reserve(buff, MAX_TCP_HEADER);
894
895         t1 = skb_push(buff, tot_len);
896         skb_reset_transport_header(buff);
897
898         /* Swap the send and the receive. */
899         memset(t1, 0, sizeof(*t1));
900         t1->dest = th->source;
901         t1->source = th->dest;
902         t1->doff = tot_len / 4;
903         t1->seq = htonl(seq);
904         t1->ack_seq = htonl(ack);
905         t1->ack = !rst || !th->ack;
906         t1->rst = rst;
907         t1->window = htons(win);
908
909         topt = (__be32 *)(t1 + 1);
910
911         if (tsecr) {
912                 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
913                                 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
914                 *topt++ = htonl(tsval);
915                 *topt++ = htonl(tsecr);
916         }
917
918         if (mrst)
919                 *topt++ = mrst;
920
921 #ifdef CONFIG_TCP_MD5SIG
922         if (key) {
923                 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
924                                 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
925                 tcp_v6_md5_hash_hdr((__u8 *)topt, key,
926                                     &ipv6_hdr(skb)->saddr,
927                                     &ipv6_hdr(skb)->daddr, t1);
928         }
929 #endif
930
931         memset(&fl6, 0, sizeof(fl6));
932         fl6.daddr = ipv6_hdr(skb)->saddr;
933         fl6.saddr = ipv6_hdr(skb)->daddr;
934         fl6.flowlabel = label;
935
936         buff->ip_summed = CHECKSUM_PARTIAL;
937
938         __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
939
940         fl6.flowi6_proto = IPPROTO_TCP;
941         if (rt6_need_strict(&fl6.daddr) && !oif)
942                 fl6.flowi6_oif = tcp_v6_iif(skb);
943         else {
944                 if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
945                         oif = skb->skb_iif;
946
947                 fl6.flowi6_oif = oif;
948         }
949
950         if (sk) {
951                 if (sk->sk_state == TCP_TIME_WAIT)
952                         mark = inet_twsk(sk)->tw_mark;
953                 else
954                         mark = sk->sk_mark;
955                 skb_set_delivery_time(buff, tcp_transmit_time(sk), true);
956         }
957         if (txhash) {
958                 /* autoflowlabel/skb_get_hash_flowi6 rely on buff->hash */
959                 skb_set_hash(buff, txhash, PKT_HASH_TYPE_L4);
960         }
961         fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark;
962         fl6.fl6_dport = t1->dest;
963         fl6.fl6_sport = t1->source;
964         fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
965         security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
966
967         /* Pass a socket to ip6_dst_lookup either it is for RST
968          * Underlying function will use this to retrieve the network
969          * namespace
970          */
971         if (sk && sk->sk_state != TCP_TIME_WAIT)
972                 dst = ip6_dst_lookup_flow(net, sk, &fl6, NULL); /*sk's xfrm_policy can be referred*/
973         else
974                 dst = ip6_dst_lookup_flow(net, ctl_sk, &fl6, NULL);
975         if (!IS_ERR(dst)) {
976                 skb_dst_set(buff, dst);
977                 ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL,
978                          tclass & ~INET_ECN_MASK, priority);
979                 TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
980                 if (rst)
981                         TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
982                 return;
983         }
984
985         kfree_skb(buff);
986 }
987
988 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
989 {
990         const struct tcphdr *th = tcp_hdr(skb);
991         struct ipv6hdr *ipv6h = ipv6_hdr(skb);
992         u32 seq = 0, ack_seq = 0;
993         struct tcp_md5sig_key *key = NULL;
994 #ifdef CONFIG_TCP_MD5SIG
995         const __u8 *hash_location = NULL;
996         unsigned char newhash[16];
997         int genhash;
998         struct sock *sk1 = NULL;
999 #endif
1000         __be32 label = 0;
1001         u32 priority = 0;
1002         struct net *net;
1003         int oif = 0;
1004
1005         if (th->rst)
1006                 return;
1007
1008         /* If sk not NULL, it means we did a successful lookup and incoming
1009          * route had to be correct. prequeue might have dropped our dst.
1010          */
1011         if (!sk && !ipv6_unicast_destination(skb))
1012                 return;
1013
1014         net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
1015 #ifdef CONFIG_TCP_MD5SIG
1016         rcu_read_lock();
1017         hash_location = tcp_parse_md5sig_option(th);
1018         if (sk && sk_fullsock(sk)) {
1019                 int l3index;
1020
1021                 /* sdif set, means packet ingressed via a device
1022                  * in an L3 domain and inet_iif is set to it.
1023                  */
1024                 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1025                 key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index);
1026         } else if (hash_location) {
1027                 int dif = tcp_v6_iif_l3_slave(skb);
1028                 int sdif = tcp_v6_sdif(skb);
1029                 int l3index;
1030
1031                 /*
1032                  * active side is lost. Try to find listening socket through
1033                  * source port, and then find md5 key through listening socket.
1034                  * we are not loose security here:
1035                  * Incoming packet is checked with md5 hash with finding key,
1036                  * no RST generated if md5 hash doesn't match.
1037                  */
1038                 sk1 = inet6_lookup_listener(net,
1039                                            &tcp_hashinfo, NULL, 0,
1040                                            &ipv6h->saddr,
1041                                            th->source, &ipv6h->daddr,
1042                                            ntohs(th->source), dif, sdif);
1043                 if (!sk1)
1044                         goto out;
1045
1046                 /* sdif set, means packet ingressed via a device
1047                  * in an L3 domain and dif is set to it.
1048                  */
1049                 l3index = tcp_v6_sdif(skb) ? dif : 0;
1050
1051                 key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index);
1052                 if (!key)
1053                         goto out;
1054
1055                 genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
1056                 if (genhash || memcmp(hash_location, newhash, 16) != 0)
1057                         goto out;
1058         }
1059 #endif
1060
1061         if (th->ack)
1062                 seq = ntohl(th->ack_seq);
1063         else
1064                 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
1065                           (th->doff << 2);
1066
1067         if (sk) {
1068                 oif = sk->sk_bound_dev_if;
1069                 if (sk_fullsock(sk)) {
1070                         const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1071
1072                         trace_tcp_send_reset(sk, skb);
1073                         if (np->repflow)
1074                                 label = ip6_flowlabel(ipv6h);
1075                         priority = sk->sk_priority;
1076                 }
1077                 if (sk->sk_state == TCP_TIME_WAIT) {
1078                         label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel);
1079                         priority = inet_twsk(sk)->tw_priority;
1080                 }
1081         } else {
1082                 if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET)
1083                         label = ip6_flowlabel(ipv6h);
1084         }
1085
1086         tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1,
1087                              ipv6_get_dsfield(ipv6h), label, priority, 0);
1088
1089 #ifdef CONFIG_TCP_MD5SIG
1090 out:
1091         rcu_read_unlock();
1092 #endif
1093 }
1094
1095 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
1096                             u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
1097                             struct tcp_md5sig_key *key, u8 tclass,
1098                             __be32 label, u32 priority, u32 txhash)
1099 {
1100         tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
1101                              tclass, label, priority, txhash);
1102 }
1103
1104 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1105 {
1106         struct inet_timewait_sock *tw = inet_twsk(sk);
1107         struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1108
1109         tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1110                         tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1111                         tcp_time_stamp_raw() + tcptw->tw_ts_offset,
1112                         tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
1113                         tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority,
1114                         tw->tw_txhash);
1115
1116         inet_twsk_put(tw);
1117 }
1118
1119 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
1120                                   struct request_sock *req)
1121 {
1122         int l3index;
1123
1124         l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1125
1126         /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
1127          * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
1128          */
1129         /* RFC 7323 2.3
1130          * The window field (SEG.WND) of every outgoing segment, with the
1131          * exception of <SYN> segments, MUST be right-shifted by
1132          * Rcv.Wind.Shift bits:
1133          */
1134         tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
1135                         tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
1136                         tcp_rsk(req)->rcv_nxt,
1137                         req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
1138                         tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
1139                         req->ts_recent, sk->sk_bound_dev_if,
1140                         tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index),
1141                         ipv6_get_dsfield(ipv6_hdr(skb)), 0, sk->sk_priority,
1142                         tcp_rsk(req)->txhash);
1143 }
1144
1145
1146 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
1147 {
1148 #ifdef CONFIG_SYN_COOKIES
1149         const struct tcphdr *th = tcp_hdr(skb);
1150
1151         if (!th->syn)
1152                 sk = cookie_v6_check(sk, skb);
1153 #endif
1154         return sk;
1155 }
1156
1157 u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
1158                          struct tcphdr *th, u32 *cookie)
1159 {
1160         u16 mss = 0;
1161 #ifdef CONFIG_SYN_COOKIES
1162         mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops,
1163                                     &tcp_request_sock_ipv6_ops, sk, th);
1164         if (mss) {
1165                 *cookie = __cookie_v6_init_sequence(iph, th, &mss);
1166                 tcp_synq_overflow(sk);
1167         }
1168 #endif
1169         return mss;
1170 }
1171
1172 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1173 {
1174         if (skb->protocol == htons(ETH_P_IP))
1175                 return tcp_v4_conn_request(sk, skb);
1176
1177         if (!ipv6_unicast_destination(skb))
1178                 goto drop;
1179
1180         if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) {
1181                 __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS);
1182                 return 0;
1183         }
1184
1185         return tcp_conn_request(&tcp6_request_sock_ops,
1186                                 &tcp_request_sock_ipv6_ops, sk, skb);
1187
1188 drop:
1189         tcp_listendrop(sk);
1190         return 0; /* don't send reset */
1191 }
1192
1193 static void tcp_v6_restore_cb(struct sk_buff *skb)
1194 {
1195         /* We need to move header back to the beginning if xfrm6_policy_check()
1196          * and tcp_v6_fill_cb() are going to be called again.
1197          * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
1198          */
1199         memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
1200                 sizeof(struct inet6_skb_parm));
1201 }
1202
1203 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1204                                          struct request_sock *req,
1205                                          struct dst_entry *dst,
1206                                          struct request_sock *req_unhash,
1207                                          bool *own_req)
1208 {
1209         struct inet_request_sock *ireq;
1210         struct ipv6_pinfo *newnp;
1211         const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1212         struct ipv6_txoptions *opt;
1213         struct inet_sock *newinet;
1214         bool found_dup_sk = false;
1215         struct tcp_sock *newtp;
1216         struct sock *newsk;
1217 #ifdef CONFIG_TCP_MD5SIG
1218         struct tcp_md5sig_key *key;
1219         int l3index;
1220 #endif
1221         struct flowi6 fl6;
1222
1223         if (skb->protocol == htons(ETH_P_IP)) {
1224                 /*
1225                  *      v6 mapped
1226                  */
1227
1228                 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
1229                                              req_unhash, own_req);
1230
1231                 if (!newsk)
1232                         return NULL;
1233
1234                 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1235
1236                 newnp = tcp_inet6_sk(newsk);
1237                 newtp = tcp_sk(newsk);
1238
1239                 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1240
1241                 newnp->saddr = newsk->sk_v6_rcv_saddr;
1242
1243                 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1244                 if (sk_is_mptcp(newsk))
1245                         mptcpv6_handle_mapped(newsk, true);
1246                 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1247 #ifdef CONFIG_TCP_MD5SIG
1248                 newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1249 #endif
1250
1251                 newnp->ipv6_mc_list = NULL;
1252                 newnp->ipv6_ac_list = NULL;
1253                 newnp->ipv6_fl_list = NULL;
1254                 newnp->pktoptions  = NULL;
1255                 newnp->opt         = NULL;
1256                 newnp->mcast_oif   = inet_iif(skb);
1257                 newnp->mcast_hops  = ip_hdr(skb)->ttl;
1258                 newnp->rcv_flowinfo = 0;
1259                 if (np->repflow)
1260                         newnp->flow_label = 0;
1261
1262                 /*
1263                  * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1264                  * here, tcp_create_openreq_child now does this for us, see the comment in
1265                  * that function for the gory details. -acme
1266                  */
1267
1268                 /* It is tricky place. Until this moment IPv4 tcp
1269                    worked with IPv6 icsk.icsk_af_ops.
1270                    Sync it now.
1271                  */
1272                 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1273
1274                 return newsk;
1275         }
1276
1277         ireq = inet_rsk(req);
1278
1279         if (sk_acceptq_is_full(sk))
1280                 goto out_overflow;
1281
1282         if (!dst) {
1283                 dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
1284                 if (!dst)
1285                         goto out;
1286         }
1287
1288         newsk = tcp_create_openreq_child(sk, req, skb);
1289         if (!newsk)
1290                 goto out_nonewsk;
1291
1292         /*
1293          * No need to charge this sock to the relevant IPv6 refcnt debug socks
1294          * count here, tcp_create_openreq_child now does this for us, see the
1295          * comment in that function for the gory details. -acme
1296          */
1297
1298         newsk->sk_gso_type = SKB_GSO_TCPV6;
1299         ip6_dst_store(newsk, dst, NULL, NULL);
1300         inet6_sk_rx_dst_set(newsk, skb);
1301
1302         inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1303
1304         newtp = tcp_sk(newsk);
1305         newinet = inet_sk(newsk);
1306         newnp = tcp_inet6_sk(newsk);
1307
1308         memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1309
1310         newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
1311         newnp->saddr = ireq->ir_v6_loc_addr;
1312         newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
1313         newsk->sk_bound_dev_if = ireq->ir_iif;
1314
1315         /* Now IPv6 options...
1316
1317            First: no IPv4 options.
1318          */
1319         newinet->inet_opt = NULL;
1320         newnp->ipv6_mc_list = NULL;
1321         newnp->ipv6_ac_list = NULL;
1322         newnp->ipv6_fl_list = NULL;
1323
1324         /* Clone RX bits */
1325         newnp->rxopt.all = np->rxopt.all;
1326
1327         newnp->pktoptions = NULL;
1328         newnp->opt        = NULL;
1329         newnp->mcast_oif  = tcp_v6_iif(skb);
1330         newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1331         newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1332         if (np->repflow)
1333                 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1334
1335         /* Set ToS of the new socket based upon the value of incoming SYN.
1336          * ECT bits are set later in tcp_init_transfer().
1337          */
1338         if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos))
1339                 newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;
1340
1341         /* Clone native IPv6 options from listening socket (if any)
1342
1343            Yes, keeping reference count would be much more clever,
1344            but we make one more one thing there: reattach optmem
1345            to newsk.
1346          */
1347         opt = ireq->ipv6_opt;
1348         if (!opt)
1349                 opt = rcu_dereference(np->opt);
1350         if (opt) {
1351                 opt = ipv6_dup_options(newsk, opt);
1352                 RCU_INIT_POINTER(newnp->opt, opt);
1353         }
1354         inet_csk(newsk)->icsk_ext_hdr_len = 0;
1355         if (opt)
1356                 inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
1357                                                     opt->opt_flen;
1358
1359         tcp_ca_openreq_child(newsk, dst);
1360
1361         tcp_sync_mss(newsk, dst_mtu(dst));
1362         newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1363
1364         tcp_initialize_rcv_mss(newsk);
1365
1366         newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1367         newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1368
1369 #ifdef CONFIG_TCP_MD5SIG
1370         l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif);
1371
1372         /* Copy over the MD5 key from the original socket */
1373         key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index);
1374         if (key) {
1375                 /* We're using one, so create a matching key
1376                  * on the newsk structure. If we fail to get
1377                  * memory, then we end up not copying the key
1378                  * across. Shucks.
1379                  */
1380                 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr,
1381                                AF_INET6, 128, l3index, key->flags, key->key, key->keylen,
1382                                sk_gfp_mask(sk, GFP_ATOMIC));
1383         }
1384 #endif
1385
1386         if (__inet_inherit_port(sk, newsk) < 0) {
1387                 inet_csk_prepare_forced_close(newsk);
1388                 tcp_done(newsk);
1389                 goto out;
1390         }
1391         *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash),
1392                                        &found_dup_sk);
1393         if (*own_req) {
1394                 tcp_move_syn(newtp, req);
1395
1396                 /* Clone pktoptions received with SYN, if we own the req */
1397                 if (ireq->pktopts) {
1398                         newnp->pktoptions = skb_clone(ireq->pktopts,
1399                                                       sk_gfp_mask(sk, GFP_ATOMIC));
1400                         consume_skb(ireq->pktopts);
1401                         ireq->pktopts = NULL;
1402                         if (newnp->pktoptions) {
1403                                 tcp_v6_restore_cb(newnp->pktoptions);
1404                                 skb_set_owner_r(newnp->pktoptions, newsk);
1405                         }
1406                 }
1407         } else {
1408                 if (!req_unhash && found_dup_sk) {
1409                         /* This code path should only be executed in the
1410                          * syncookie case only
1411                          */
1412                         bh_unlock_sock(newsk);
1413                         sock_put(newsk);
1414                         newsk = NULL;
1415                 }
1416         }
1417
1418         return newsk;
1419
1420 out_overflow:
1421         __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1422 out_nonewsk:
1423         dst_release(dst);
1424 out:
1425         tcp_listendrop(sk);
1426         return NULL;
1427 }
1428
1429 INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *,
1430                                                            u32));
1431 /* The socket must have it's spinlock held when we get
1432  * here, unless it is a TCP_LISTEN socket.
1433  *
1434  * We have a potential double-lock case here, so even when
1435  * doing backlog processing we use the BH locking scheme.
1436  * This is because we cannot sleep with the original spinlock
1437  * held.
1438  */
1439 INDIRECT_CALLABLE_SCOPE
1440 int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1441 {
1442         struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1443         struct sk_buff *opt_skb = NULL;
1444         enum skb_drop_reason reason;
1445         struct tcp_sock *tp;
1446
1447         /* Imagine: socket is IPv6. IPv4 packet arrives,
1448            goes to IPv4 receive handler and backlogged.
1449            From backlog it always goes here. Kerboom...
1450            Fortunately, tcp_rcv_established and rcv_established
1451            handle them correctly, but it is not case with
1452            tcp_v6_hnd_req and tcp_v6_send_reset().   --ANK
1453          */
1454
1455         if (skb->protocol == htons(ETH_P_IP))
1456                 return tcp_v4_do_rcv(sk, skb);
1457
1458         /*
1459          *      socket locking is here for SMP purposes as backlog rcv
1460          *      is currently called with bh processing disabled.
1461          */
1462
1463         /* Do Stevens' IPV6_PKTOPTIONS.
1464
1465            Yes, guys, it is the only place in our code, where we
1466            may make it not affecting IPv4.
1467            The rest of code is protocol independent,
1468            and I do not like idea to uglify IPv4.
1469
1470            Actually, all the idea behind IPV6_PKTOPTIONS
1471            looks not very well thought. For now we latch
1472            options, received in the last packet, enqueued
1473            by tcp. Feel free to propose better solution.
1474                                                --ANK (980728)
1475          */
1476         if (np->rxopt.all)
1477                 opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC));
1478
1479         reason = SKB_DROP_REASON_NOT_SPECIFIED;
1480         if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1481                 struct dst_entry *dst;
1482
1483                 dst = rcu_dereference_protected(sk->sk_rx_dst,
1484                                                 lockdep_sock_is_held(sk));
1485
1486                 sock_rps_save_rxhash(sk, skb);
1487                 sk_mark_napi_id(sk, skb);
1488                 if (dst) {
1489                         if (sk->sk_rx_dst_ifindex != skb->skb_iif ||
1490                             INDIRECT_CALL_1(dst->ops->check, ip6_dst_check,
1491                                             dst, sk->sk_rx_dst_cookie) == NULL) {
1492                                 RCU_INIT_POINTER(sk->sk_rx_dst, NULL);
1493                                 dst_release(dst);
1494                         }
1495                 }
1496
1497                 tcp_rcv_established(sk, skb);
1498                 if (opt_skb)
1499                         goto ipv6_pktoptions;
1500                 return 0;
1501         }
1502
1503         if (tcp_checksum_complete(skb))
1504                 goto csum_err;
1505
1506         if (sk->sk_state == TCP_LISTEN) {
1507                 struct sock *nsk = tcp_v6_cookie_check(sk, skb);
1508
1509                 if (!nsk)
1510                         goto discard;
1511
1512                 if (nsk != sk) {
1513                         if (tcp_child_process(sk, nsk, skb))
1514                                 goto reset;
1515                         if (opt_skb)
1516                                 __kfree_skb(opt_skb);
1517                         return 0;
1518                 }
1519         } else
1520                 sock_rps_save_rxhash(sk, skb);
1521
1522         if (tcp_rcv_state_process(sk, skb))
1523                 goto reset;
1524         if (opt_skb)
1525                 goto ipv6_pktoptions;
1526         return 0;
1527
1528 reset:
1529         tcp_v6_send_reset(sk, skb);
1530 discard:
1531         if (opt_skb)
1532                 __kfree_skb(opt_skb);
1533         kfree_skb_reason(skb, reason);
1534         return 0;
1535 csum_err:
1536         reason = SKB_DROP_REASON_TCP_CSUM;
1537         trace_tcp_bad_csum(skb);
1538         TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1539         TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1540         goto discard;
1541
1542
1543 ipv6_pktoptions:
1544         /* Do you ask, what is it?
1545
1546            1. skb was enqueued by tcp.
1547            2. skb is added to tail of read queue, rather than out of order.
1548            3. socket is not in passive state.
1549            4. Finally, it really contains options, which user wants to receive.
1550          */
1551         tp = tcp_sk(sk);
1552         if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1553             !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1554                 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1555                         np->mcast_oif = tcp_v6_iif(opt_skb);
1556                 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1557                         np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1558                 if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1559                         np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1560                 if (np->repflow)
1561                         np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1562                 if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
1563                         skb_set_owner_r(opt_skb, sk);
1564                         tcp_v6_restore_cb(opt_skb);
1565                         opt_skb = xchg(&np->pktoptions, opt_skb);
1566                 } else {
1567                         __kfree_skb(opt_skb);
1568                         opt_skb = xchg(&np->pktoptions, NULL);
1569                 }
1570         }
1571
1572         consume_skb(opt_skb);
1573         return 0;
1574 }
1575
1576 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
1577                            const struct tcphdr *th)
1578 {
1579         /* This is tricky: we move IP6CB at its correct location into
1580          * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
1581          * _decode_session6() uses IP6CB().
1582          * barrier() makes sure compiler won't play aliasing games.
1583          */
1584         memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
1585                 sizeof(struct inet6_skb_parm));
1586         barrier();
1587
1588         TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1589         TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1590                                     skb->len - th->doff*4);
1591         TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1592         TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1593         TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1594         TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1595         TCP_SKB_CB(skb)->sacked = 0;
1596         TCP_SKB_CB(skb)->has_rxtstamp =
1597                         skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1598 }
1599
1600 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
1601 {
1602         enum skb_drop_reason drop_reason;
1603         int sdif = inet6_sdif(skb);
1604         int dif = inet6_iif(skb);
1605         const struct tcphdr *th;
1606         const struct ipv6hdr *hdr;
1607         bool refcounted;
1608         struct sock *sk;
1609         int ret;
1610         struct net *net = dev_net(skb->dev);
1611
1612         drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
1613         if (skb->pkt_type != PACKET_HOST)
1614                 goto discard_it;
1615
1616         /*
1617          *      Count it even if it's bad.
1618          */
1619         __TCP_INC_STATS(net, TCP_MIB_INSEGS);
1620
1621         if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1622                 goto discard_it;
1623
1624         th = (const struct tcphdr *)skb->data;
1625
1626         if (unlikely(th->doff < sizeof(struct tcphdr) / 4)) {
1627                 drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL;
1628                 goto bad_packet;
1629         }
1630         if (!pskb_may_pull(skb, th->doff*4))
1631                 goto discard_it;
1632
1633         if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1634                 goto csum_error;
1635
1636         th = (const struct tcphdr *)skb->data;
1637         hdr = ipv6_hdr(skb);
1638
1639 lookup:
1640         sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th),
1641                                 th->source, th->dest, inet6_iif(skb), sdif,
1642                                 &refcounted);
1643         if (!sk)
1644                 goto no_tcp_socket;
1645
1646 process:
1647         if (sk->sk_state == TCP_TIME_WAIT)
1648                 goto do_time_wait;
1649
1650         if (sk->sk_state == TCP_NEW_SYN_RECV) {
1651                 struct request_sock *req = inet_reqsk(sk);
1652                 bool req_stolen = false;
1653                 struct sock *nsk;
1654
1655                 sk = req->rsk_listener;
1656                 drop_reason = tcp_inbound_md5_hash(sk, skb,
1657                                                    &hdr->saddr, &hdr->daddr,
1658                                                    AF_INET6, dif, sdif);
1659                 if (drop_reason) {
1660                         sk_drops_add(sk, skb);
1661                         reqsk_put(req);
1662                         goto discard_it;
1663                 }
1664                 if (tcp_checksum_complete(skb)) {
1665                         reqsk_put(req);
1666                         goto csum_error;
1667                 }
1668                 if (unlikely(sk->sk_state != TCP_LISTEN)) {
1669                         nsk = reuseport_migrate_sock(sk, req_to_sk(req), skb);
1670                         if (!nsk) {
1671                                 inet_csk_reqsk_queue_drop_and_put(sk, req);
1672                                 goto lookup;
1673                         }
1674                         sk = nsk;
1675                         /* reuseport_migrate_sock() has already held one sk_refcnt
1676                          * before returning.
1677                          */
1678                 } else {
1679                         sock_hold(sk);
1680                 }
1681                 refcounted = true;
1682                 nsk = NULL;
1683                 if (!tcp_filter(sk, skb)) {
1684                         th = (const struct tcphdr *)skb->data;
1685                         hdr = ipv6_hdr(skb);
1686                         tcp_v6_fill_cb(skb, hdr, th);
1687                         nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
1688                 } else {
1689                         drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
1690                 }
1691                 if (!nsk) {
1692                         reqsk_put(req);
1693                         if (req_stolen) {
1694                                 /* Another cpu got exclusive access to req
1695                                  * and created a full blown socket.
1696                                  * Try to feed this packet to this socket
1697                                  * instead of discarding it.
1698                                  */
1699                                 tcp_v6_restore_cb(skb);
1700                                 sock_put(sk);
1701                                 goto lookup;
1702                         }
1703                         goto discard_and_relse;
1704                 }
1705                 if (nsk == sk) {
1706                         reqsk_put(req);
1707                         tcp_v6_restore_cb(skb);
1708                 } else if (tcp_child_process(sk, nsk, skb)) {
1709                         tcp_v6_send_reset(nsk, skb);
1710                         goto discard_and_relse;
1711                 } else {
1712                         sock_put(sk);
1713                         return 0;
1714                 }
1715         }
1716
1717         if (static_branch_unlikely(&ip6_min_hopcount)) {
1718                 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
1719                 if (hdr->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) {
1720                         __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1721                         goto discard_and_relse;
1722                 }
1723         }
1724
1725         if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) {
1726                 drop_reason = SKB_DROP_REASON_XFRM_POLICY;
1727                 goto discard_and_relse;
1728         }
1729
1730         drop_reason = tcp_inbound_md5_hash(sk, skb, &hdr->saddr, &hdr->daddr,
1731                                            AF_INET6, dif, sdif);
1732         if (drop_reason)
1733                 goto discard_and_relse;
1734
1735         if (tcp_filter(sk, skb)) {
1736                 drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
1737                 goto discard_and_relse;
1738         }
1739         th = (const struct tcphdr *)skb->data;
1740         hdr = ipv6_hdr(skb);
1741         tcp_v6_fill_cb(skb, hdr, th);
1742
1743         skb->dev = NULL;
1744
1745         if (sk->sk_state == TCP_LISTEN) {
1746                 ret = tcp_v6_do_rcv(sk, skb);
1747                 goto put_and_return;
1748         }
1749
1750         sk_incoming_cpu_update(sk);
1751
1752         bh_lock_sock_nested(sk);
1753         tcp_segs_in(tcp_sk(sk), skb);
1754         ret = 0;
1755         if (!sock_owned_by_user(sk)) {
1756                 ret = tcp_v6_do_rcv(sk, skb);
1757         } else {
1758                 if (tcp_add_backlog(sk, skb, &drop_reason))
1759                         goto discard_and_relse;
1760         }
1761         bh_unlock_sock(sk);
1762 put_and_return:
1763         if (refcounted)
1764                 sock_put(sk);
1765         return ret ? -1 : 0;
1766
1767 no_tcp_socket:
1768         drop_reason = SKB_DROP_REASON_NO_SOCKET;
1769         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1770                 goto discard_it;
1771
1772         tcp_v6_fill_cb(skb, hdr, th);
1773
1774         if (tcp_checksum_complete(skb)) {
1775 csum_error:
1776                 drop_reason = SKB_DROP_REASON_TCP_CSUM;
1777                 trace_tcp_bad_csum(skb);
1778                 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1779 bad_packet:
1780                 __TCP_INC_STATS(net, TCP_MIB_INERRS);
1781         } else {
1782                 tcp_v6_send_reset(NULL, skb);
1783         }
1784
1785 discard_it:
1786         SKB_DR_OR(drop_reason, NOT_SPECIFIED);
1787         kfree_skb_reason(skb, drop_reason);
1788         return 0;
1789
1790 discard_and_relse:
1791         sk_drops_add(sk, skb);
1792         if (refcounted)
1793                 sock_put(sk);
1794         goto discard_it;
1795
1796 do_time_wait:
1797         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1798                 drop_reason = SKB_DROP_REASON_XFRM_POLICY;
1799                 inet_twsk_put(inet_twsk(sk));
1800                 goto discard_it;
1801         }
1802
1803         tcp_v6_fill_cb(skb, hdr, th);
1804
1805         if (tcp_checksum_complete(skb)) {
1806                 inet_twsk_put(inet_twsk(sk));
1807                 goto csum_error;
1808         }
1809
1810         switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1811         case TCP_TW_SYN:
1812         {
1813                 struct sock *sk2;
1814
1815                 sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
1816                                             skb, __tcp_hdrlen(th),
1817                                             &ipv6_hdr(skb)->saddr, th->source,
1818                                             &ipv6_hdr(skb)->daddr,
1819                                             ntohs(th->dest),
1820                                             tcp_v6_iif_l3_slave(skb),
1821                                             sdif);
1822                 if (sk2) {
1823                         struct inet_timewait_sock *tw = inet_twsk(sk);
1824                         inet_twsk_deschedule_put(tw);
1825                         sk = sk2;
1826                         tcp_v6_restore_cb(skb);
1827                         refcounted = false;
1828                         goto process;
1829                 }
1830         }
1831                 /* to ACK */
1832                 fallthrough;
1833         case TCP_TW_ACK:
1834                 tcp_v6_timewait_ack(sk, skb);
1835                 break;
1836         case TCP_TW_RST:
1837                 tcp_v6_send_reset(sk, skb);
1838                 inet_twsk_deschedule_put(inet_twsk(sk));
1839                 goto discard_it;
1840         case TCP_TW_SUCCESS:
1841                 ;
1842         }
1843         goto discard_it;
1844 }
1845
1846 void tcp_v6_early_demux(struct sk_buff *skb)
1847 {
1848         const struct ipv6hdr *hdr;
1849         const struct tcphdr *th;
1850         struct sock *sk;
1851
1852         if (skb->pkt_type != PACKET_HOST)
1853                 return;
1854
1855         if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1856                 return;
1857
1858         hdr = ipv6_hdr(skb);
1859         th = tcp_hdr(skb);
1860
1861         if (th->doff < sizeof(struct tcphdr) / 4)
1862                 return;
1863
1864         /* Note : We use inet6_iif() here, not tcp_v6_iif() */
1865         sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
1866                                         &hdr->saddr, th->source,
1867                                         &hdr->daddr, ntohs(th->dest),
1868                                         inet6_iif(skb), inet6_sdif(skb));
1869         if (sk) {
1870                 skb->sk = sk;
1871                 skb->destructor = sock_edemux;
1872                 if (sk_fullsock(sk)) {
1873                         struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst);
1874
1875                         if (dst)
1876                                 dst = dst_check(dst, sk->sk_rx_dst_cookie);
1877                         if (dst &&
1878                             sk->sk_rx_dst_ifindex == skb->skb_iif)
1879                                 skb_dst_set_noref(skb, dst);
1880                 }
1881         }
1882 }
1883
1884 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1885         .twsk_obj_size  = sizeof(struct tcp6_timewait_sock),
1886         .twsk_unique    = tcp_twsk_unique,
1887         .twsk_destructor = tcp_twsk_destructor,
1888 };
1889
1890 INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)
1891 {
1892         __tcp_v6_send_check(skb, &sk->sk_v6_rcv_saddr, &sk->sk_v6_daddr);
1893 }
1894
1895 const struct inet_connection_sock_af_ops ipv6_specific = {
1896         .queue_xmit        = inet6_csk_xmit,
1897         .send_check        = tcp_v6_send_check,
1898         .rebuild_header    = inet6_sk_rebuild_header,
1899         .sk_rx_dst_set     = inet6_sk_rx_dst_set,
1900         .conn_request      = tcp_v6_conn_request,
1901         .syn_recv_sock     = tcp_v6_syn_recv_sock,
1902         .net_header_len    = sizeof(struct ipv6hdr),
1903         .net_frag_header_len = sizeof(struct frag_hdr),
1904         .setsockopt        = ipv6_setsockopt,
1905         .getsockopt        = ipv6_getsockopt,
1906         .addr2sockaddr     = inet6_csk_addr2sockaddr,
1907         .sockaddr_len      = sizeof(struct sockaddr_in6),
1908         .mtu_reduced       = tcp_v6_mtu_reduced,
1909 };
1910
1911 #ifdef CONFIG_TCP_MD5SIG
1912 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1913         .md5_lookup     =       tcp_v6_md5_lookup,
1914         .calc_md5_hash  =       tcp_v6_md5_hash_skb,
1915         .md5_parse      =       tcp_v6_parse_md5_keys,
1916 };
1917 #endif
1918
1919 /*
1920  *      TCP over IPv4 via INET6 API
1921  */
1922 static const struct inet_connection_sock_af_ops ipv6_mapped = {
1923         .queue_xmit        = ip_queue_xmit,
1924         .send_check        = tcp_v4_send_check,
1925         .rebuild_header    = inet_sk_rebuild_header,
1926         .sk_rx_dst_set     = inet_sk_rx_dst_set,
1927         .conn_request      = tcp_v6_conn_request,
1928         .syn_recv_sock     = tcp_v6_syn_recv_sock,
1929         .net_header_len    = sizeof(struct iphdr),
1930         .setsockopt        = ipv6_setsockopt,
1931         .getsockopt        = ipv6_getsockopt,
1932         .addr2sockaddr     = inet6_csk_addr2sockaddr,
1933         .sockaddr_len      = sizeof(struct sockaddr_in6),
1934         .mtu_reduced       = tcp_v4_mtu_reduced,
1935 };
1936
1937 #ifdef CONFIG_TCP_MD5SIG
1938 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1939         .md5_lookup     =       tcp_v4_md5_lookup,
1940         .calc_md5_hash  =       tcp_v4_md5_hash_skb,
1941         .md5_parse      =       tcp_v6_parse_md5_keys,
1942 };
1943 #endif
1944
1945 /* NOTE: A lot of things set to zero explicitly by call to
1946  *       sk_alloc() so need not be done here.
1947  */
1948 static int tcp_v6_init_sock(struct sock *sk)
1949 {
1950         struct inet_connection_sock *icsk = inet_csk(sk);
1951
1952         tcp_init_sock(sk);
1953
1954         icsk->icsk_af_ops = &ipv6_specific;
1955
1956 #ifdef CONFIG_TCP_MD5SIG
1957         tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1958 #endif
1959
1960         return 0;
1961 }
1962
1963 static void tcp_v6_destroy_sock(struct sock *sk)
1964 {
1965         tcp_v4_destroy_sock(sk);
1966         inet6_destroy_sock(sk);
1967 }
1968
1969 #ifdef CONFIG_PROC_FS
1970 /* Proc filesystem TCPv6 sock list dumping. */
1971 static void get_openreq6(struct seq_file *seq,
1972                          const struct request_sock *req, int i)
1973 {
1974         long ttd = req->rsk_timer.expires - jiffies;
1975         const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
1976         const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
1977
1978         if (ttd < 0)
1979                 ttd = 0;
1980
1981         seq_printf(seq,
1982                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1983                    "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
1984                    i,
1985                    src->s6_addr32[0], src->s6_addr32[1],
1986                    src->s6_addr32[2], src->s6_addr32[3],
1987                    inet_rsk(req)->ir_num,
1988                    dest->s6_addr32[0], dest->s6_addr32[1],
1989                    dest->s6_addr32[2], dest->s6_addr32[3],
1990                    ntohs(inet_rsk(req)->ir_rmt_port),
1991                    TCP_SYN_RECV,
1992                    0, 0, /* could print option size, but that is af dependent. */
1993                    1,   /* timers active (only the expire timer) */
1994                    jiffies_to_clock_t(ttd),
1995                    req->num_timeout,
1996                    from_kuid_munged(seq_user_ns(seq),
1997                                     sock_i_uid(req->rsk_listener)),
1998                    0,  /* non standard timer */
1999                    0, /* open_requests have no inode */
2000                    0, req);
2001 }
2002
2003 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
2004 {
2005         const struct in6_addr *dest, *src;
2006         __u16 destp, srcp;
2007         int timer_active;
2008         unsigned long timer_expires;
2009         const struct inet_sock *inet = inet_sk(sp);
2010         const struct tcp_sock *tp = tcp_sk(sp);
2011         const struct inet_connection_sock *icsk = inet_csk(sp);
2012         const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
2013         int rx_queue;
2014         int state;
2015
2016         dest  = &sp->sk_v6_daddr;
2017         src   = &sp->sk_v6_rcv_saddr;
2018         destp = ntohs(inet->inet_dport);
2019         srcp  = ntohs(inet->inet_sport);
2020
2021         if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
2022             icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
2023             icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
2024                 timer_active    = 1;
2025                 timer_expires   = icsk->icsk_timeout;
2026         } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2027                 timer_active    = 4;
2028                 timer_expires   = icsk->icsk_timeout;
2029         } else if (timer_pending(&sp->sk_timer)) {
2030                 timer_active    = 2;
2031                 timer_expires   = sp->sk_timer.expires;
2032         } else {
2033                 timer_active    = 0;
2034                 timer_expires = jiffies;
2035         }
2036
2037         state = inet_sk_state_load(sp);
2038         if (state == TCP_LISTEN)
2039                 rx_queue = READ_ONCE(sp->sk_ack_backlog);
2040         else
2041                 /* Because we don't lock the socket,
2042                  * we might find a transient negative value.
2043                  */
2044                 rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
2045                                       READ_ONCE(tp->copied_seq), 0);
2046
2047         seq_printf(seq,
2048                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2049                    "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
2050                    i,
2051                    src->s6_addr32[0], src->s6_addr32[1],
2052                    src->s6_addr32[2], src->s6_addr32[3], srcp,
2053                    dest->s6_addr32[0], dest->s6_addr32[1],
2054                    dest->s6_addr32[2], dest->s6_addr32[3], destp,
2055                    state,
2056                    READ_ONCE(tp->write_seq) - tp->snd_una,
2057                    rx_queue,
2058                    timer_active,
2059                    jiffies_delta_to_clock_t(timer_expires - jiffies),
2060                    icsk->icsk_retransmits,
2061                    from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
2062                    icsk->icsk_probes_out,
2063                    sock_i_ino(sp),
2064                    refcount_read(&sp->sk_refcnt), sp,
2065                    jiffies_to_clock_t(icsk->icsk_rto),
2066                    jiffies_to_clock_t(icsk->icsk_ack.ato),
2067                    (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp),
2068                    tcp_snd_cwnd(tp),
2069                    state == TCP_LISTEN ?
2070                         fastopenq->max_qlen :
2071                         (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
2072                    );
2073 }
2074
2075 static void get_timewait6_sock(struct seq_file *seq,
2076                                struct inet_timewait_sock *tw, int i)
2077 {
2078         long delta = tw->tw_timer.expires - jiffies;
2079         const struct in6_addr *dest, *src;
2080         __u16 destp, srcp;
2081
2082         dest = &tw->tw_v6_daddr;
2083         src  = &tw->tw_v6_rcv_saddr;
2084         destp = ntohs(tw->tw_dport);
2085         srcp  = ntohs(tw->tw_sport);
2086
2087         seq_printf(seq,
2088                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2089                    "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
2090                    i,
2091                    src->s6_addr32[0], src->s6_addr32[1],
2092                    src->s6_addr32[2], src->s6_addr32[3], srcp,
2093                    dest->s6_addr32[0], dest->s6_addr32[1],
2094                    dest->s6_addr32[2], dest->s6_addr32[3], destp,
2095                    tw->tw_substate, 0, 0,
2096                    3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
2097                    refcount_read(&tw->tw_refcnt), tw);
2098 }
2099
2100 static int tcp6_seq_show(struct seq_file *seq, void *v)
2101 {
2102         struct tcp_iter_state *st;
2103         struct sock *sk = v;
2104
2105         if (v == SEQ_START_TOKEN) {
2106                 seq_puts(seq,
2107                          "  sl  "
2108                          "local_address                         "
2109                          "remote_address                        "
2110                          "st tx_queue rx_queue tr tm->when retrnsmt"
2111                          "   uid  timeout inode\n");
2112                 goto out;
2113         }
2114         st = seq->private;
2115
2116         if (sk->sk_state == TCP_TIME_WAIT)
2117                 get_timewait6_sock(seq, v, st->num);
2118         else if (sk->sk_state == TCP_NEW_SYN_RECV)
2119                 get_openreq6(seq, v, st->num);
2120         else
2121                 get_tcp6_sock(seq, v, st->num);
2122 out:
2123         return 0;
2124 }
2125
2126 static const struct seq_operations tcp6_seq_ops = {
2127         .show           = tcp6_seq_show,
2128         .start          = tcp_seq_start,
2129         .next           = tcp_seq_next,
2130         .stop           = tcp_seq_stop,
2131 };
2132
2133 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2134         .family         = AF_INET6,
2135 };
2136
2137 int __net_init tcp6_proc_init(struct net *net)
2138 {
2139         if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops,
2140                         sizeof(struct tcp_iter_state), &tcp6_seq_afinfo))
2141                 return -ENOMEM;
2142         return 0;
2143 }
2144
2145 void tcp6_proc_exit(struct net *net)
2146 {
2147         remove_proc_entry("tcp6", net->proc_net);
2148 }
2149 #endif
2150
2151 struct proto tcpv6_prot = {
2152         .name                   = "TCPv6",
2153         .owner                  = THIS_MODULE,
2154         .close                  = tcp_close,
2155         .pre_connect            = tcp_v6_pre_connect,
2156         .connect                = tcp_v6_connect,
2157         .disconnect             = tcp_disconnect,
2158         .accept                 = inet_csk_accept,
2159         .ioctl                  = tcp_ioctl,
2160         .init                   = tcp_v6_init_sock,
2161         .destroy                = tcp_v6_destroy_sock,
2162         .shutdown               = tcp_shutdown,
2163         .setsockopt             = tcp_setsockopt,
2164         .getsockopt             = tcp_getsockopt,
2165         .bpf_bypass_getsockopt  = tcp_bpf_bypass_getsockopt,
2166         .keepalive              = tcp_set_keepalive,
2167         .recvmsg                = tcp_recvmsg,
2168         .sendmsg                = tcp_sendmsg,
2169         .sendpage               = tcp_sendpage,
2170         .backlog_rcv            = tcp_v6_do_rcv,
2171         .release_cb             = tcp_release_cb,
2172         .hash                   = inet6_hash,
2173         .unhash                 = inet_unhash,
2174         .get_port               = inet_csk_get_port,
2175         .put_port               = inet_put_port,
2176 #ifdef CONFIG_BPF_SYSCALL
2177         .psock_update_sk_prot   = tcp_bpf_update_proto,
2178 #endif
2179         .enter_memory_pressure  = tcp_enter_memory_pressure,
2180         .leave_memory_pressure  = tcp_leave_memory_pressure,
2181         .stream_memory_free     = tcp_stream_memory_free,
2182         .sockets_allocated      = &tcp_sockets_allocated,
2183
2184         .memory_allocated       = &tcp_memory_allocated,
2185         .per_cpu_fw_alloc       = &tcp_memory_per_cpu_fw_alloc,
2186
2187         .memory_pressure        = &tcp_memory_pressure,
2188         .orphan_count           = &tcp_orphan_count,
2189         .sysctl_mem             = sysctl_tcp_mem,
2190         .sysctl_wmem_offset     = offsetof(struct net, ipv4.sysctl_tcp_wmem),
2191         .sysctl_rmem_offset     = offsetof(struct net, ipv4.sysctl_tcp_rmem),
2192         .max_header             = MAX_TCP_HEADER,
2193         .obj_size               = sizeof(struct tcp6_sock),
2194         .slab_flags             = SLAB_TYPESAFE_BY_RCU,
2195         .twsk_prot              = &tcp6_timewait_sock_ops,
2196         .rsk_prot               = &tcp6_request_sock_ops,
2197         .h.hashinfo             = &tcp_hashinfo,
2198         .no_autobind            = true,
2199         .diag_destroy           = tcp_abort,
2200 };
2201 EXPORT_SYMBOL_GPL(tcpv6_prot);
2202
2203 static const struct inet6_protocol tcpv6_protocol = {
2204         .handler        =       tcp_v6_rcv,
2205         .err_handler    =       tcp_v6_err,
2206         .flags          =       INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2207 };
2208
2209 static struct inet_protosw tcpv6_protosw = {
2210         .type           =       SOCK_STREAM,
2211         .protocol       =       IPPROTO_TCP,
2212         .prot           =       &tcpv6_prot,
2213         .ops            =       &inet6_stream_ops,
2214         .flags          =       INET_PROTOSW_PERMANENT |
2215                                 INET_PROTOSW_ICSK,
2216 };
2217
2218 static int __net_init tcpv6_net_init(struct net *net)
2219 {
2220         return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2221                                     SOCK_RAW, IPPROTO_TCP, net);
2222 }
2223
2224 static void __net_exit tcpv6_net_exit(struct net *net)
2225 {
2226         inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2227 }
2228
2229 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
2230 {
2231         inet_twsk_purge(&tcp_hashinfo, AF_INET6);
2232 }
2233
2234 static struct pernet_operations tcpv6_net_ops = {
2235         .init       = tcpv6_net_init,
2236         .exit       = tcpv6_net_exit,
2237         .exit_batch = tcpv6_net_exit_batch,
2238 };
2239
2240 int __init tcpv6_init(void)
2241 {
2242         int ret;
2243
2244         ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2245         if (ret)
2246                 goto out;
2247
2248         /* register inet6 protocol */
2249         ret = inet6_register_protosw(&tcpv6_protosw);
2250         if (ret)
2251                 goto out_tcpv6_protocol;
2252
2253         ret = register_pernet_subsys(&tcpv6_net_ops);
2254         if (ret)
2255                 goto out_tcpv6_protosw;
2256
2257         ret = mptcpv6_init();
2258         if (ret)
2259                 goto out_tcpv6_pernet_subsys;
2260
2261 out:
2262         return ret;
2263
2264 out_tcpv6_pernet_subsys:
2265         unregister_pernet_subsys(&tcpv6_net_ops);
2266 out_tcpv6_protosw:
2267         inet6_unregister_protosw(&tcpv6_protosw);
2268 out_tcpv6_protocol:
2269         inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2270         goto out;
2271 }
2272
2273 void tcpv6_exit(void)
2274 {
2275         unregister_pernet_subsys(&tcpv6_net_ops);
2276         inet6_unregister_protosw(&tcpv6_protosw);
2277         inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2278 }