d657713d1c71df880fccfbee891039444a53e750
[platform/kernel/linux-starfive.git] / net / ipv6 / tcp_ipv6.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *      TCP over IPv6
4  *      Linux INET6 implementation
5  *
6  *      Authors:
7  *      Pedro Roque             <roque@di.fc.ul.pt>
8  *
9  *      Based on:
10  *      linux/net/ipv4/tcp.c
11  *      linux/net/ipv4/tcp_input.c
12  *      linux/net/ipv4/tcp_output.c
13  *
14  *      Fixes:
15  *      Hideaki YOSHIFUJI       :       sin6_scope_id support
16  *      YOSHIFUJI Hideaki @USAGI and:   Support IPV6_V6ONLY socket option, which
17  *      Alexey Kuznetsov                allow both IPv4 and IPv6 sockets to bind
18  *                                      a single port at the same time.
19  *      YOSHIFUJI Hideaki @USAGI:       convert /proc/net/tcp6 to seq_file.
20  */
21
22 #include <linux/bottom_half.h>
23 #include <linux/module.h>
24 #include <linux/errno.h>
25 #include <linux/types.h>
26 #include <linux/socket.h>
27 #include <linux/sockios.h>
28 #include <linux/net.h>
29 #include <linux/jiffies.h>
30 #include <linux/in.h>
31 #include <linux/in6.h>
32 #include <linux/netdevice.h>
33 #include <linux/init.h>
34 #include <linux/jhash.h>
35 #include <linux/ipsec.h>
36 #include <linux/times.h>
37 #include <linux/slab.h>
38 #include <linux/uaccess.h>
39 #include <linux/ipv6.h>
40 #include <linux/icmpv6.h>
41 #include <linux/random.h>
42 #include <linux/indirect_call_wrapper.h>
43
44 #include <net/tcp.h>
45 #include <net/ndisc.h>
46 #include <net/inet6_hashtables.h>
47 #include <net/inet6_connection_sock.h>
48 #include <net/ipv6.h>
49 #include <net/transp_v6.h>
50 #include <net/addrconf.h>
51 #include <net/ip6_route.h>
52 #include <net/ip6_checksum.h>
53 #include <net/inet_ecn.h>
54 #include <net/protocol.h>
55 #include <net/xfrm.h>
56 #include <net/snmp.h>
57 #include <net/dsfield.h>
58 #include <net/timewait_sock.h>
59 #include <net/inet_common.h>
60 #include <net/secure_seq.h>
61 #include <net/busy_poll.h>
62
63 #include <linux/proc_fs.h>
64 #include <linux/seq_file.h>
65
66 #include <crypto/hash.h>
67 #include <linux/scatterlist.h>
68
69 #include <trace/events/tcp.h>
70
71 static void     tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
72 static void     tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
73                                       struct request_sock *req);
74
75 INDIRECT_CALLABLE_SCOPE int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
76
77 static const struct inet_connection_sock_af_ops ipv6_mapped;
78 const struct inet_connection_sock_af_ops ipv6_specific;
79 #ifdef CONFIG_TCP_MD5SIG
80 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
81 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
82 #else
83 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
84                                                    const struct in6_addr *addr,
85                                                    int l3index)
86 {
87         return NULL;
88 }
89 #endif
90
91 /* Helper returning the inet6 address from a given tcp socket.
92  * It can be used in TCP stack instead of inet6_sk(sk).
93  * This avoids a dereference and allow compiler optimizations.
94  * It is a specialized version of inet6_sk_generic().
95  */
96 #define tcp_inet6_sk(sk) (&container_of_const(tcp_sk(sk), \
97                                               struct tcp6_sock, tcp)->inet6)
98
99 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
100 {
101         struct dst_entry *dst = skb_dst(skb);
102
103         if (dst && dst_hold_safe(dst)) {
104                 const struct rt6_info *rt = (const struct rt6_info *)dst;
105
106                 rcu_assign_pointer(sk->sk_rx_dst, dst);
107                 sk->sk_rx_dst_ifindex = skb->skb_iif;
108                 sk->sk_rx_dst_cookie = rt6_get_cookie(rt);
109         }
110 }
111
112 static u32 tcp_v6_init_seq(const struct sk_buff *skb)
113 {
114         return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32,
115                                 ipv6_hdr(skb)->saddr.s6_addr32,
116                                 tcp_hdr(skb)->dest,
117                                 tcp_hdr(skb)->source);
118 }
119
120 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
121 {
122         return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32,
123                                    ipv6_hdr(skb)->saddr.s6_addr32);
124 }
125
126 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
127                               int addr_len)
128 {
129         /* This check is replicated from tcp_v6_connect() and intended to
130          * prevent BPF program called below from accessing bytes that are out
131          * of the bound specified by user in addr_len.
132          */
133         if (addr_len < SIN6_LEN_RFC2133)
134                 return -EINVAL;
135
136         sock_owned_by_me(sk);
137
138         return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr);
139 }
140
141 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
142                           int addr_len)
143 {
144         struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
145         struct inet_connection_sock *icsk = inet_csk(sk);
146         struct in6_addr *saddr = NULL, *final_p, final;
147         struct inet_timewait_death_row *tcp_death_row;
148         struct ipv6_pinfo *np = tcp_inet6_sk(sk);
149         struct inet_sock *inet = inet_sk(sk);
150         struct tcp_sock *tp = tcp_sk(sk);
151         struct net *net = sock_net(sk);
152         struct ipv6_txoptions *opt;
153         struct dst_entry *dst;
154         struct flowi6 fl6;
155         int addr_type;
156         int err;
157
158         if (addr_len < SIN6_LEN_RFC2133)
159                 return -EINVAL;
160
161         if (usin->sin6_family != AF_INET6)
162                 return -EAFNOSUPPORT;
163
164         memset(&fl6, 0, sizeof(fl6));
165
166         if (np->sndflow) {
167                 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
168                 IP6_ECN_flow_init(fl6.flowlabel);
169                 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
170                         struct ip6_flowlabel *flowlabel;
171                         flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
172                         if (IS_ERR(flowlabel))
173                                 return -EINVAL;
174                         fl6_sock_release(flowlabel);
175                 }
176         }
177
178         /*
179          *      connect() to INADDR_ANY means loopback (BSD'ism).
180          */
181
182         if (ipv6_addr_any(&usin->sin6_addr)) {
183                 if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
184                         ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
185                                                &usin->sin6_addr);
186                 else
187                         usin->sin6_addr = in6addr_loopback;
188         }
189
190         addr_type = ipv6_addr_type(&usin->sin6_addr);
191
192         if (addr_type & IPV6_ADDR_MULTICAST)
193                 return -ENETUNREACH;
194
195         if (addr_type&IPV6_ADDR_LINKLOCAL) {
196                 if (addr_len >= sizeof(struct sockaddr_in6) &&
197                     usin->sin6_scope_id) {
198                         /* If interface is set while binding, indices
199                          * must coincide.
200                          */
201                         if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id))
202                                 return -EINVAL;
203
204                         sk->sk_bound_dev_if = usin->sin6_scope_id;
205                 }
206
207                 /* Connect to link-local address requires an interface */
208                 if (!sk->sk_bound_dev_if)
209                         return -EINVAL;
210         }
211
212         if (tp->rx_opt.ts_recent_stamp &&
213             !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
214                 tp->rx_opt.ts_recent = 0;
215                 tp->rx_opt.ts_recent_stamp = 0;
216                 WRITE_ONCE(tp->write_seq, 0);
217         }
218
219         sk->sk_v6_daddr = usin->sin6_addr;
220         np->flow_label = fl6.flowlabel;
221
222         /*
223          *      TCP over IPv4
224          */
225
226         if (addr_type & IPV6_ADDR_MAPPED) {
227                 u32 exthdrlen = icsk->icsk_ext_hdr_len;
228                 struct sockaddr_in sin;
229
230                 if (ipv6_only_sock(sk))
231                         return -ENETUNREACH;
232
233                 sin.sin_family = AF_INET;
234                 sin.sin_port = usin->sin6_port;
235                 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
236
237                 /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */
238                 WRITE_ONCE(icsk->icsk_af_ops, &ipv6_mapped);
239                 if (sk_is_mptcp(sk))
240                         mptcpv6_handle_mapped(sk, true);
241                 sk->sk_backlog_rcv = tcp_v4_do_rcv;
242 #ifdef CONFIG_TCP_MD5SIG
243                 tp->af_specific = &tcp_sock_ipv6_mapped_specific;
244 #endif
245
246                 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
247
248                 if (err) {
249                         icsk->icsk_ext_hdr_len = exthdrlen;
250                         /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */
251                         WRITE_ONCE(icsk->icsk_af_ops, &ipv6_specific);
252                         if (sk_is_mptcp(sk))
253                                 mptcpv6_handle_mapped(sk, false);
254                         sk->sk_backlog_rcv = tcp_v6_do_rcv;
255 #ifdef CONFIG_TCP_MD5SIG
256                         tp->af_specific = &tcp_sock_ipv6_specific;
257 #endif
258                         goto failure;
259                 }
260                 np->saddr = sk->sk_v6_rcv_saddr;
261
262                 return err;
263         }
264
265         if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
266                 saddr = &sk->sk_v6_rcv_saddr;
267
268         fl6.flowi6_proto = IPPROTO_TCP;
269         fl6.daddr = sk->sk_v6_daddr;
270         fl6.saddr = saddr ? *saddr : np->saddr;
271         fl6.flowlabel = ip6_make_flowinfo(np->tclass, np->flow_label);
272         fl6.flowi6_oif = sk->sk_bound_dev_if;
273         fl6.flowi6_mark = sk->sk_mark;
274         fl6.fl6_dport = usin->sin6_port;
275         fl6.fl6_sport = inet->inet_sport;
276         fl6.flowi6_uid = sk->sk_uid;
277
278         opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
279         final_p = fl6_update_dst(&fl6, opt, &final);
280
281         security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
282
283         dst = ip6_dst_lookup_flow(net, sk, &fl6, final_p);
284         if (IS_ERR(dst)) {
285                 err = PTR_ERR(dst);
286                 goto failure;
287         }
288
289         tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
290
291         if (!saddr) {
292                 saddr = &fl6.saddr;
293
294                 err = inet_bhash2_update_saddr(sk, saddr, AF_INET6);
295                 if (err)
296                         goto failure;
297         }
298
299         /* set the source address */
300         np->saddr = *saddr;
301         inet->inet_rcv_saddr = LOOPBACK4_IPV6;
302
303         sk->sk_gso_type = SKB_GSO_TCPV6;
304         ip6_dst_store(sk, dst, NULL, NULL);
305
306         icsk->icsk_ext_hdr_len = 0;
307         if (opt)
308                 icsk->icsk_ext_hdr_len = opt->opt_flen +
309                                          opt->opt_nflen;
310
311         tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
312
313         inet->inet_dport = usin->sin6_port;
314
315         tcp_set_state(sk, TCP_SYN_SENT);
316         err = inet6_hash_connect(tcp_death_row, sk);
317         if (err)
318                 goto late_failure;
319
320         sk_set_txhash(sk);
321
322         if (likely(!tp->repair)) {
323                 if (!tp->write_seq)
324                         WRITE_ONCE(tp->write_seq,
325                                    secure_tcpv6_seq(np->saddr.s6_addr32,
326                                                     sk->sk_v6_daddr.s6_addr32,
327                                                     inet->inet_sport,
328                                                     inet->inet_dport));
329                 tp->tsoffset = secure_tcpv6_ts_off(net, np->saddr.s6_addr32,
330                                                    sk->sk_v6_daddr.s6_addr32);
331         }
332
333         if (tcp_fastopen_defer_connect(sk, &err))
334                 return err;
335         if (err)
336                 goto late_failure;
337
338         err = tcp_connect(sk);
339         if (err)
340                 goto late_failure;
341
342         return 0;
343
344 late_failure:
345         tcp_set_state(sk, TCP_CLOSE);
346         inet_bhash2_reset_saddr(sk);
347 failure:
348         inet->inet_dport = 0;
349         sk->sk_route_caps = 0;
350         return err;
351 }
352
353 static void tcp_v6_mtu_reduced(struct sock *sk)
354 {
355         struct dst_entry *dst;
356         u32 mtu;
357
358         if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
359                 return;
360
361         mtu = READ_ONCE(tcp_sk(sk)->mtu_info);
362
363         /* Drop requests trying to increase our current mss.
364          * Check done in __ip6_rt_update_pmtu() is too late.
365          */
366         if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache)
367                 return;
368
369         dst = inet6_csk_update_pmtu(sk, mtu);
370         if (!dst)
371                 return;
372
373         if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
374                 tcp_sync_mss(sk, dst_mtu(dst));
375                 tcp_simple_retransmit(sk);
376         }
377 }
378
379 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
380                 u8 type, u8 code, int offset, __be32 info)
381 {
382         const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
383         const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
384         struct net *net = dev_net(skb->dev);
385         struct request_sock *fastopen;
386         struct ipv6_pinfo *np;
387         struct tcp_sock *tp;
388         __u32 seq, snd_una;
389         struct sock *sk;
390         bool fatal;
391         int err;
392
393         sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
394                                         &hdr->daddr, th->dest,
395                                         &hdr->saddr, ntohs(th->source),
396                                         skb->dev->ifindex, inet6_sdif(skb));
397
398         if (!sk) {
399                 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
400                                   ICMP6_MIB_INERRORS);
401                 return -ENOENT;
402         }
403
404         if (sk->sk_state == TCP_TIME_WAIT) {
405                 inet_twsk_put(inet_twsk(sk));
406                 return 0;
407         }
408         seq = ntohl(th->seq);
409         fatal = icmpv6_err_convert(type, code, &err);
410         if (sk->sk_state == TCP_NEW_SYN_RECV) {
411                 tcp_req_err(sk, seq, fatal);
412                 return 0;
413         }
414
415         bh_lock_sock(sk);
416         if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
417                 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
418
419         if (sk->sk_state == TCP_CLOSE)
420                 goto out;
421
422         if (static_branch_unlikely(&ip6_min_hopcount)) {
423                 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
424                 if (ipv6_hdr(skb)->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) {
425                         __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
426                         goto out;
427                 }
428         }
429
430         tp = tcp_sk(sk);
431         /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
432         fastopen = rcu_dereference(tp->fastopen_rsk);
433         snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
434         if (sk->sk_state != TCP_LISTEN &&
435             !between(seq, snd_una, tp->snd_nxt)) {
436                 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
437                 goto out;
438         }
439
440         np = tcp_inet6_sk(sk);
441
442         if (type == NDISC_REDIRECT) {
443                 if (!sock_owned_by_user(sk)) {
444                         struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
445
446                         if (dst)
447                                 dst->ops->redirect(dst, sk, skb);
448                 }
449                 goto out;
450         }
451
452         if (type == ICMPV6_PKT_TOOBIG) {
453                 u32 mtu = ntohl(info);
454
455                 /* We are not interested in TCP_LISTEN and open_requests
456                  * (SYN-ACKs send out by Linux are always <576bytes so
457                  * they should go through unfragmented).
458                  */
459                 if (sk->sk_state == TCP_LISTEN)
460                         goto out;
461
462                 if (!ip6_sk_accept_pmtu(sk))
463                         goto out;
464
465                 if (mtu < IPV6_MIN_MTU)
466                         goto out;
467
468                 WRITE_ONCE(tp->mtu_info, mtu);
469
470                 if (!sock_owned_by_user(sk))
471                         tcp_v6_mtu_reduced(sk);
472                 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
473                                            &sk->sk_tsq_flags))
474                         sock_hold(sk);
475                 goto out;
476         }
477
478
479         /* Might be for an request_sock */
480         switch (sk->sk_state) {
481         case TCP_SYN_SENT:
482         case TCP_SYN_RECV:
483                 /* Only in fast or simultaneous open. If a fast open socket is
484                  * already accepted it is treated as a connected one below.
485                  */
486                 if (fastopen && !fastopen->sk)
487                         break;
488
489                 ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th);
490
491                 if (!sock_owned_by_user(sk)) {
492                         WRITE_ONCE(sk->sk_err, err);
493                         sk_error_report(sk);            /* Wake people up to see the error (see connect in sock.c) */
494
495                         tcp_done(sk);
496                 } else {
497                         WRITE_ONCE(sk->sk_err_soft, err);
498                 }
499                 goto out;
500         case TCP_LISTEN:
501                 break;
502         default:
503                 /* check if this ICMP message allows revert of backoff.
504                  * (see RFC 6069)
505                  */
506                 if (!fastopen && type == ICMPV6_DEST_UNREACH &&
507                     code == ICMPV6_NOROUTE)
508                         tcp_ld_RTO_revert(sk, seq);
509         }
510
511         if (!sock_owned_by_user(sk) && np->recverr) {
512                 WRITE_ONCE(sk->sk_err, err);
513                 sk_error_report(sk);
514         } else {
515                 WRITE_ONCE(sk->sk_err_soft, err);
516         }
517 out:
518         bh_unlock_sock(sk);
519         sock_put(sk);
520         return 0;
521 }
522
523
524 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
525                               struct flowi *fl,
526                               struct request_sock *req,
527                               struct tcp_fastopen_cookie *foc,
528                               enum tcp_synack_type synack_type,
529                               struct sk_buff *syn_skb)
530 {
531         struct inet_request_sock *ireq = inet_rsk(req);
532         const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
533         struct ipv6_txoptions *opt;
534         struct flowi6 *fl6 = &fl->u.ip6;
535         struct sk_buff *skb;
536         int err = -ENOMEM;
537         u8 tclass;
538
539         /* First, grab a route. */
540         if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
541                                                IPPROTO_TCP)) == NULL)
542                 goto done;
543
544         skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb);
545
546         if (skb) {
547                 __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
548                                     &ireq->ir_v6_rmt_addr);
549
550                 fl6->daddr = ireq->ir_v6_rmt_addr;
551                 if (np->repflow && ireq->pktopts)
552                         fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
553
554                 tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ?
555                                 (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
556                                 (np->tclass & INET_ECN_MASK) :
557                                 np->tclass;
558
559                 if (!INET_ECN_is_capable(tclass) &&
560                     tcp_bpf_ca_needs_ecn((struct sock *)req))
561                         tclass |= INET_ECN_ECT_0;
562
563                 rcu_read_lock();
564                 opt = ireq->ipv6_opt;
565                 if (!opt)
566                         opt = rcu_dereference(np->opt);
567                 err = ip6_xmit(sk, skb, fl6, skb->mark ? : sk->sk_mark, opt,
568                                tclass, sk->sk_priority);
569                 rcu_read_unlock();
570                 err = net_xmit_eval(err);
571         }
572
573 done:
574         return err;
575 }
576
577
578 static void tcp_v6_reqsk_destructor(struct request_sock *req)
579 {
580         kfree(inet_rsk(req)->ipv6_opt);
581         consume_skb(inet_rsk(req)->pktopts);
582 }
583
584 #ifdef CONFIG_TCP_MD5SIG
585 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
586                                                    const struct in6_addr *addr,
587                                                    int l3index)
588 {
589         return tcp_md5_do_lookup(sk, l3index,
590                                  (union tcp_md5_addr *)addr, AF_INET6);
591 }
592
593 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
594                                                 const struct sock *addr_sk)
595 {
596         int l3index;
597
598         l3index = l3mdev_master_ifindex_by_index(sock_net(sk),
599                                                  addr_sk->sk_bound_dev_if);
600         return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr,
601                                     l3index);
602 }
603
604 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
605                                  sockptr_t optval, int optlen)
606 {
607         struct tcp_md5sig cmd;
608         struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
609         int l3index = 0;
610         u8 prefixlen;
611         u8 flags;
612
613         if (optlen < sizeof(cmd))
614                 return -EINVAL;
615
616         if (copy_from_sockptr(&cmd, optval, sizeof(cmd)))
617                 return -EFAULT;
618
619         if (sin6->sin6_family != AF_INET6)
620                 return -EINVAL;
621
622         flags = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX;
623
624         if (optname == TCP_MD5SIG_EXT &&
625             cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
626                 prefixlen = cmd.tcpm_prefixlen;
627                 if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
628                                         prefixlen > 32))
629                         return -EINVAL;
630         } else {
631                 prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
632         }
633
634         if (optname == TCP_MD5SIG_EXT && cmd.tcpm_ifindex &&
635             cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) {
636                 struct net_device *dev;
637
638                 rcu_read_lock();
639                 dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex);
640                 if (dev && netif_is_l3_master(dev))
641                         l3index = dev->ifindex;
642                 rcu_read_unlock();
643
644                 /* ok to reference set/not set outside of rcu;
645                  * right now device MUST be an L3 master
646                  */
647                 if (!dev || !l3index)
648                         return -EINVAL;
649         }
650
651         if (!cmd.tcpm_keylen) {
652                 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
653                         return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
654                                               AF_INET, prefixlen,
655                                               l3index, flags);
656                 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
657                                       AF_INET6, prefixlen, l3index, flags);
658         }
659
660         if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
661                 return -EINVAL;
662
663         if (ipv6_addr_v4mapped(&sin6->sin6_addr))
664                 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
665                                       AF_INET, prefixlen, l3index, flags,
666                                       cmd.tcpm_key, cmd.tcpm_keylen);
667
668         return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
669                               AF_INET6, prefixlen, l3index, flags,
670                               cmd.tcpm_key, cmd.tcpm_keylen);
671 }
672
673 static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
674                                    const struct in6_addr *daddr,
675                                    const struct in6_addr *saddr,
676                                    const struct tcphdr *th, int nbytes)
677 {
678         struct tcp6_pseudohdr *bp;
679         struct scatterlist sg;
680         struct tcphdr *_th;
681
682         bp = hp->scratch;
683         /* 1. TCP pseudo-header (RFC2460) */
684         bp->saddr = *saddr;
685         bp->daddr = *daddr;
686         bp->protocol = cpu_to_be32(IPPROTO_TCP);
687         bp->len = cpu_to_be32(nbytes);
688
689         _th = (struct tcphdr *)(bp + 1);
690         memcpy(_th, th, sizeof(*th));
691         _th->check = 0;
692
693         sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
694         ahash_request_set_crypt(hp->md5_req, &sg, NULL,
695                                 sizeof(*bp) + sizeof(*th));
696         return crypto_ahash_update(hp->md5_req);
697 }
698
699 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
700                                const struct in6_addr *daddr, struct in6_addr *saddr,
701                                const struct tcphdr *th)
702 {
703         struct tcp_md5sig_pool *hp;
704         struct ahash_request *req;
705
706         hp = tcp_get_md5sig_pool();
707         if (!hp)
708                 goto clear_hash_noput;
709         req = hp->md5_req;
710
711         if (crypto_ahash_init(req))
712                 goto clear_hash;
713         if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
714                 goto clear_hash;
715         if (tcp_md5_hash_key(hp, key))
716                 goto clear_hash;
717         ahash_request_set_crypt(req, NULL, md5_hash, 0);
718         if (crypto_ahash_final(req))
719                 goto clear_hash;
720
721         tcp_put_md5sig_pool();
722         return 0;
723
724 clear_hash:
725         tcp_put_md5sig_pool();
726 clear_hash_noput:
727         memset(md5_hash, 0, 16);
728         return 1;
729 }
730
731 static int tcp_v6_md5_hash_skb(char *md5_hash,
732                                const struct tcp_md5sig_key *key,
733                                const struct sock *sk,
734                                const struct sk_buff *skb)
735 {
736         const struct in6_addr *saddr, *daddr;
737         struct tcp_md5sig_pool *hp;
738         struct ahash_request *req;
739         const struct tcphdr *th = tcp_hdr(skb);
740
741         if (sk) { /* valid for establish/request sockets */
742                 saddr = &sk->sk_v6_rcv_saddr;
743                 daddr = &sk->sk_v6_daddr;
744         } else {
745                 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
746                 saddr = &ip6h->saddr;
747                 daddr = &ip6h->daddr;
748         }
749
750         hp = tcp_get_md5sig_pool();
751         if (!hp)
752                 goto clear_hash_noput;
753         req = hp->md5_req;
754
755         if (crypto_ahash_init(req))
756                 goto clear_hash;
757
758         if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
759                 goto clear_hash;
760         if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
761                 goto clear_hash;
762         if (tcp_md5_hash_key(hp, key))
763                 goto clear_hash;
764         ahash_request_set_crypt(req, NULL, md5_hash, 0);
765         if (crypto_ahash_final(req))
766                 goto clear_hash;
767
768         tcp_put_md5sig_pool();
769         return 0;
770
771 clear_hash:
772         tcp_put_md5sig_pool();
773 clear_hash_noput:
774         memset(md5_hash, 0, 16);
775         return 1;
776 }
777
778 #endif
779
780 static void tcp_v6_init_req(struct request_sock *req,
781                             const struct sock *sk_listener,
782                             struct sk_buff *skb)
783 {
784         bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
785         struct inet_request_sock *ireq = inet_rsk(req);
786         const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener);
787
788         ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
789         ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
790
791         /* So that link locals have meaning */
792         if ((!sk_listener->sk_bound_dev_if || l3_slave) &&
793             ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
794                 ireq->ir_iif = tcp_v6_iif(skb);
795
796         if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
797             (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
798              np->rxopt.bits.rxinfo ||
799              np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
800              np->rxopt.bits.rxohlim || np->repflow)) {
801                 refcount_inc(&skb->users);
802                 ireq->pktopts = skb;
803         }
804 }
805
806 static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
807                                           struct sk_buff *skb,
808                                           struct flowi *fl,
809                                           struct request_sock *req)
810 {
811         tcp_v6_init_req(req, sk, skb);
812
813         if (security_inet_conn_request(sk, skb, req))
814                 return NULL;
815
816         return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
817 }
818
819 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
820         .family         =       AF_INET6,
821         .obj_size       =       sizeof(struct tcp6_request_sock),
822         .rtx_syn_ack    =       tcp_rtx_synack,
823         .send_ack       =       tcp_v6_reqsk_send_ack,
824         .destructor     =       tcp_v6_reqsk_destructor,
825         .send_reset     =       tcp_v6_send_reset,
826         .syn_ack_timeout =      tcp_syn_ack_timeout,
827 };
828
829 const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
830         .mss_clamp      =       IPV6_MIN_MTU - sizeof(struct tcphdr) -
831                                 sizeof(struct ipv6hdr),
832 #ifdef CONFIG_TCP_MD5SIG
833         .req_md5_lookup =       tcp_v6_md5_lookup,
834         .calc_md5_hash  =       tcp_v6_md5_hash_skb,
835 #endif
836 #ifdef CONFIG_SYN_COOKIES
837         .cookie_init_seq =      cookie_v6_init_sequence,
838 #endif
839         .route_req      =       tcp_v6_route_req,
840         .init_seq       =       tcp_v6_init_seq,
841         .init_ts_off    =       tcp_v6_init_ts_off,
842         .send_synack    =       tcp_v6_send_synack,
843 };
844
845 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
846                                  u32 ack, u32 win, u32 tsval, u32 tsecr,
847                                  int oif, struct tcp_md5sig_key *key, int rst,
848                                  u8 tclass, __be32 label, u32 priority, u32 txhash)
849 {
850         const struct tcphdr *th = tcp_hdr(skb);
851         struct tcphdr *t1;
852         struct sk_buff *buff;
853         struct flowi6 fl6;
854         struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
855         struct sock *ctl_sk = net->ipv6.tcp_sk;
856         unsigned int tot_len = sizeof(struct tcphdr);
857         __be32 mrst = 0, *topt;
858         struct dst_entry *dst;
859         __u32 mark = 0;
860
861         if (tsecr)
862                 tot_len += TCPOLEN_TSTAMP_ALIGNED;
863 #ifdef CONFIG_TCP_MD5SIG
864         if (key)
865                 tot_len += TCPOLEN_MD5SIG_ALIGNED;
866 #endif
867
868 #ifdef CONFIG_MPTCP
869         if (rst && !key) {
870                 mrst = mptcp_reset_option(skb);
871
872                 if (mrst)
873                         tot_len += sizeof(__be32);
874         }
875 #endif
876
877         buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
878         if (!buff)
879                 return;
880
881         skb_reserve(buff, MAX_TCP_HEADER);
882
883         t1 = skb_push(buff, tot_len);
884         skb_reset_transport_header(buff);
885
886         /* Swap the send and the receive. */
887         memset(t1, 0, sizeof(*t1));
888         t1->dest = th->source;
889         t1->source = th->dest;
890         t1->doff = tot_len / 4;
891         t1->seq = htonl(seq);
892         t1->ack_seq = htonl(ack);
893         t1->ack = !rst || !th->ack;
894         t1->rst = rst;
895         t1->window = htons(win);
896
897         topt = (__be32 *)(t1 + 1);
898
899         if (tsecr) {
900                 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
901                                 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
902                 *topt++ = htonl(tsval);
903                 *topt++ = htonl(tsecr);
904         }
905
906         if (mrst)
907                 *topt++ = mrst;
908
909 #ifdef CONFIG_TCP_MD5SIG
910         if (key) {
911                 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
912                                 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
913                 tcp_v6_md5_hash_hdr((__u8 *)topt, key,
914                                     &ipv6_hdr(skb)->saddr,
915                                     &ipv6_hdr(skb)->daddr, t1);
916         }
917 #endif
918
919         memset(&fl6, 0, sizeof(fl6));
920         fl6.daddr = ipv6_hdr(skb)->saddr;
921         fl6.saddr = ipv6_hdr(skb)->daddr;
922         fl6.flowlabel = label;
923
924         buff->ip_summed = CHECKSUM_PARTIAL;
925
926         __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
927
928         fl6.flowi6_proto = IPPROTO_TCP;
929         if (rt6_need_strict(&fl6.daddr) && !oif)
930                 fl6.flowi6_oif = tcp_v6_iif(skb);
931         else {
932                 if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
933                         oif = skb->skb_iif;
934
935                 fl6.flowi6_oif = oif;
936         }
937
938         if (sk) {
939                 if (sk->sk_state == TCP_TIME_WAIT)
940                         mark = inet_twsk(sk)->tw_mark;
941                 else
942                         mark = sk->sk_mark;
943                 skb_set_delivery_time(buff, tcp_transmit_time(sk), true);
944         }
945         if (txhash) {
946                 /* autoflowlabel/skb_get_hash_flowi6 rely on buff->hash */
947                 skb_set_hash(buff, txhash, PKT_HASH_TYPE_L4);
948         }
949         fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark;
950         fl6.fl6_dport = t1->dest;
951         fl6.fl6_sport = t1->source;
952         fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
953         security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
954
955         /* Pass a socket to ip6_dst_lookup either it is for RST
956          * Underlying function will use this to retrieve the network
957          * namespace
958          */
959         if (sk && sk->sk_state != TCP_TIME_WAIT)
960                 dst = ip6_dst_lookup_flow(net, sk, &fl6, NULL); /*sk's xfrm_policy can be referred*/
961         else
962                 dst = ip6_dst_lookup_flow(net, ctl_sk, &fl6, NULL);
963         if (!IS_ERR(dst)) {
964                 skb_dst_set(buff, dst);
965                 ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL,
966                          tclass & ~INET_ECN_MASK, priority);
967                 TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
968                 if (rst)
969                         TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
970                 return;
971         }
972
973         kfree_skb(buff);
974 }
975
976 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
977 {
978         const struct tcphdr *th = tcp_hdr(skb);
979         struct ipv6hdr *ipv6h = ipv6_hdr(skb);
980         u32 seq = 0, ack_seq = 0;
981         struct tcp_md5sig_key *key = NULL;
982 #ifdef CONFIG_TCP_MD5SIG
983         const __u8 *hash_location = NULL;
984         unsigned char newhash[16];
985         int genhash;
986         struct sock *sk1 = NULL;
987 #endif
988         __be32 label = 0;
989         u32 priority = 0;
990         struct net *net;
991         u32 txhash = 0;
992         int oif = 0;
993
994         if (th->rst)
995                 return;
996
997         /* If sk not NULL, it means we did a successful lookup and incoming
998          * route had to be correct. prequeue might have dropped our dst.
999          */
1000         if (!sk && !ipv6_unicast_destination(skb))
1001                 return;
1002
1003         net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
1004 #ifdef CONFIG_TCP_MD5SIG
1005         rcu_read_lock();
1006         hash_location = tcp_parse_md5sig_option(th);
1007         if (sk && sk_fullsock(sk)) {
1008                 int l3index;
1009
1010                 /* sdif set, means packet ingressed via a device
1011                  * in an L3 domain and inet_iif is set to it.
1012                  */
1013                 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1014                 key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index);
1015         } else if (hash_location) {
1016                 int dif = tcp_v6_iif_l3_slave(skb);
1017                 int sdif = tcp_v6_sdif(skb);
1018                 int l3index;
1019
1020                 /*
1021                  * active side is lost. Try to find listening socket through
1022                  * source port, and then find md5 key through listening socket.
1023                  * we are not loose security here:
1024                  * Incoming packet is checked with md5 hash with finding key,
1025                  * no RST generated if md5 hash doesn't match.
1026                  */
1027                 sk1 = inet6_lookup_listener(net, net->ipv4.tcp_death_row.hashinfo,
1028                                             NULL, 0, &ipv6h->saddr, th->source,
1029                                             &ipv6h->daddr, ntohs(th->source),
1030                                             dif, sdif);
1031                 if (!sk1)
1032                         goto out;
1033
1034                 /* sdif set, means packet ingressed via a device
1035                  * in an L3 domain and dif is set to it.
1036                  */
1037                 l3index = tcp_v6_sdif(skb) ? dif : 0;
1038
1039                 key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index);
1040                 if (!key)
1041                         goto out;
1042
1043                 genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
1044                 if (genhash || memcmp(hash_location, newhash, 16) != 0)
1045                         goto out;
1046         }
1047 #endif
1048
1049         if (th->ack)
1050                 seq = ntohl(th->ack_seq);
1051         else
1052                 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
1053                           (th->doff << 2);
1054
1055         if (sk) {
1056                 oif = sk->sk_bound_dev_if;
1057                 if (sk_fullsock(sk)) {
1058                         const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1059
1060                         trace_tcp_send_reset(sk, skb);
1061                         if (np->repflow)
1062                                 label = ip6_flowlabel(ipv6h);
1063                         priority = sk->sk_priority;
1064                         txhash = sk->sk_txhash;
1065                 }
1066                 if (sk->sk_state == TCP_TIME_WAIT) {
1067                         label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel);
1068                         priority = inet_twsk(sk)->tw_priority;
1069                         txhash = inet_twsk(sk)->tw_txhash;
1070                 }
1071         } else {
1072                 if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET)
1073                         label = ip6_flowlabel(ipv6h);
1074         }
1075
1076         tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1,
1077                              ipv6_get_dsfield(ipv6h), label, priority, txhash);
1078
1079 #ifdef CONFIG_TCP_MD5SIG
1080 out:
1081         rcu_read_unlock();
1082 #endif
1083 }
1084
1085 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
1086                             u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
1087                             struct tcp_md5sig_key *key, u8 tclass,
1088                             __be32 label, u32 priority, u32 txhash)
1089 {
1090         tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
1091                              tclass, label, priority, txhash);
1092 }
1093
1094 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1095 {
1096         struct inet_timewait_sock *tw = inet_twsk(sk);
1097         struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1098
1099         tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1100                         tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1101                         tcp_time_stamp_raw() + tcptw->tw_ts_offset,
1102                         tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
1103                         tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority,
1104                         tw->tw_txhash);
1105
1106         inet_twsk_put(tw);
1107 }
1108
1109 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
1110                                   struct request_sock *req)
1111 {
1112         int l3index;
1113
1114         l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1115
1116         /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
1117          * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
1118          */
1119         /* RFC 7323 2.3
1120          * The window field (SEG.WND) of every outgoing segment, with the
1121          * exception of <SYN> segments, MUST be right-shifted by
1122          * Rcv.Wind.Shift bits:
1123          */
1124         tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
1125                         tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
1126                         tcp_rsk(req)->rcv_nxt,
1127                         req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
1128                         tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
1129                         req->ts_recent, sk->sk_bound_dev_if,
1130                         tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index),
1131                         ipv6_get_dsfield(ipv6_hdr(skb)), 0, sk->sk_priority,
1132                         tcp_rsk(req)->txhash);
1133 }
1134
1135
1136 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
1137 {
1138 #ifdef CONFIG_SYN_COOKIES
1139         const struct tcphdr *th = tcp_hdr(skb);
1140
1141         if (!th->syn)
1142                 sk = cookie_v6_check(sk, skb);
1143 #endif
1144         return sk;
1145 }
1146
1147 u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
1148                          struct tcphdr *th, u32 *cookie)
1149 {
1150         u16 mss = 0;
1151 #ifdef CONFIG_SYN_COOKIES
1152         mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops,
1153                                     &tcp_request_sock_ipv6_ops, sk, th);
1154         if (mss) {
1155                 *cookie = __cookie_v6_init_sequence(iph, th, &mss);
1156                 tcp_synq_overflow(sk);
1157         }
1158 #endif
1159         return mss;
1160 }
1161
1162 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1163 {
1164         if (skb->protocol == htons(ETH_P_IP))
1165                 return tcp_v4_conn_request(sk, skb);
1166
1167         if (!ipv6_unicast_destination(skb))
1168                 goto drop;
1169
1170         if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) {
1171                 __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS);
1172                 return 0;
1173         }
1174
1175         return tcp_conn_request(&tcp6_request_sock_ops,
1176                                 &tcp_request_sock_ipv6_ops, sk, skb);
1177
1178 drop:
1179         tcp_listendrop(sk);
1180         return 0; /* don't send reset */
1181 }
1182
1183 static void tcp_v6_restore_cb(struct sk_buff *skb)
1184 {
1185         /* We need to move header back to the beginning if xfrm6_policy_check()
1186          * and tcp_v6_fill_cb() are going to be called again.
1187          * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
1188          */
1189         memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
1190                 sizeof(struct inet6_skb_parm));
1191 }
1192
1193 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1194                                          struct request_sock *req,
1195                                          struct dst_entry *dst,
1196                                          struct request_sock *req_unhash,
1197                                          bool *own_req)
1198 {
1199         struct inet_request_sock *ireq;
1200         struct ipv6_pinfo *newnp;
1201         const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1202         struct ipv6_txoptions *opt;
1203         struct inet_sock *newinet;
1204         bool found_dup_sk = false;
1205         struct tcp_sock *newtp;
1206         struct sock *newsk;
1207 #ifdef CONFIG_TCP_MD5SIG
1208         struct tcp_md5sig_key *key;
1209         int l3index;
1210 #endif
1211         struct flowi6 fl6;
1212
1213         if (skb->protocol == htons(ETH_P_IP)) {
1214                 /*
1215                  *      v6 mapped
1216                  */
1217
1218                 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
1219                                              req_unhash, own_req);
1220
1221                 if (!newsk)
1222                         return NULL;
1223
1224                 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1225
1226                 newnp = tcp_inet6_sk(newsk);
1227                 newtp = tcp_sk(newsk);
1228
1229                 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1230
1231                 newnp->saddr = newsk->sk_v6_rcv_saddr;
1232
1233                 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1234                 if (sk_is_mptcp(newsk))
1235                         mptcpv6_handle_mapped(newsk, true);
1236                 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1237 #ifdef CONFIG_TCP_MD5SIG
1238                 newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1239 #endif
1240
1241                 newnp->ipv6_mc_list = NULL;
1242                 newnp->ipv6_ac_list = NULL;
1243                 newnp->ipv6_fl_list = NULL;
1244                 newnp->pktoptions  = NULL;
1245                 newnp->opt         = NULL;
1246                 newnp->mcast_oif   = inet_iif(skb);
1247                 newnp->mcast_hops  = ip_hdr(skb)->ttl;
1248                 newnp->rcv_flowinfo = 0;
1249                 if (np->repflow)
1250                         newnp->flow_label = 0;
1251
1252                 /*
1253                  * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1254                  * here, tcp_create_openreq_child now does this for us, see the comment in
1255                  * that function for the gory details. -acme
1256                  */
1257
1258                 /* It is tricky place. Until this moment IPv4 tcp
1259                    worked with IPv6 icsk.icsk_af_ops.
1260                    Sync it now.
1261                  */
1262                 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1263
1264                 return newsk;
1265         }
1266
1267         ireq = inet_rsk(req);
1268
1269         if (sk_acceptq_is_full(sk))
1270                 goto out_overflow;
1271
1272         if (!dst) {
1273                 dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
1274                 if (!dst)
1275                         goto out;
1276         }
1277
1278         newsk = tcp_create_openreq_child(sk, req, skb);
1279         if (!newsk)
1280                 goto out_nonewsk;
1281
1282         /*
1283          * No need to charge this sock to the relevant IPv6 refcnt debug socks
1284          * count here, tcp_create_openreq_child now does this for us, see the
1285          * comment in that function for the gory details. -acme
1286          */
1287
1288         newsk->sk_gso_type = SKB_GSO_TCPV6;
1289         ip6_dst_store(newsk, dst, NULL, NULL);
1290         inet6_sk_rx_dst_set(newsk, skb);
1291
1292         inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1293
1294         newtp = tcp_sk(newsk);
1295         newinet = inet_sk(newsk);
1296         newnp = tcp_inet6_sk(newsk);
1297
1298         memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1299
1300         newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
1301         newnp->saddr = ireq->ir_v6_loc_addr;
1302         newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
1303         newsk->sk_bound_dev_if = ireq->ir_iif;
1304
1305         /* Now IPv6 options...
1306
1307            First: no IPv4 options.
1308          */
1309         newinet->inet_opt = NULL;
1310         newnp->ipv6_mc_list = NULL;
1311         newnp->ipv6_ac_list = NULL;
1312         newnp->ipv6_fl_list = NULL;
1313
1314         /* Clone RX bits */
1315         newnp->rxopt.all = np->rxopt.all;
1316
1317         newnp->pktoptions = NULL;
1318         newnp->opt        = NULL;
1319         newnp->mcast_oif  = tcp_v6_iif(skb);
1320         newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1321         newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1322         if (np->repflow)
1323                 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1324
1325         /* Set ToS of the new socket based upon the value of incoming SYN.
1326          * ECT bits are set later in tcp_init_transfer().
1327          */
1328         if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos))
1329                 newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;
1330
1331         /* Clone native IPv6 options from listening socket (if any)
1332
1333            Yes, keeping reference count would be much more clever,
1334            but we make one more one thing there: reattach optmem
1335            to newsk.
1336          */
1337         opt = ireq->ipv6_opt;
1338         if (!opt)
1339                 opt = rcu_dereference(np->opt);
1340         if (opt) {
1341                 opt = ipv6_dup_options(newsk, opt);
1342                 RCU_INIT_POINTER(newnp->opt, opt);
1343         }
1344         inet_csk(newsk)->icsk_ext_hdr_len = 0;
1345         if (opt)
1346                 inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
1347                                                     opt->opt_flen;
1348
1349         tcp_ca_openreq_child(newsk, dst);
1350
1351         tcp_sync_mss(newsk, dst_mtu(dst));
1352         newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1353
1354         tcp_initialize_rcv_mss(newsk);
1355
1356         newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1357         newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1358
1359 #ifdef CONFIG_TCP_MD5SIG
1360         l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif);
1361
1362         /* Copy over the MD5 key from the original socket */
1363         key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index);
1364         if (key) {
1365                 const union tcp_md5_addr *addr;
1366
1367                 addr = (union tcp_md5_addr *)&newsk->sk_v6_daddr;
1368                 if (tcp_md5_key_copy(newsk, addr, AF_INET6, 128, l3index, key)) {
1369                         inet_csk_prepare_forced_close(newsk);
1370                         tcp_done(newsk);
1371                         goto out;
1372                 }
1373         }
1374 #endif
1375
1376         if (__inet_inherit_port(sk, newsk) < 0) {
1377                 inet_csk_prepare_forced_close(newsk);
1378                 tcp_done(newsk);
1379                 goto out;
1380         }
1381         *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash),
1382                                        &found_dup_sk);
1383         if (*own_req) {
1384                 tcp_move_syn(newtp, req);
1385
1386                 /* Clone pktoptions received with SYN, if we own the req */
1387                 if (ireq->pktopts) {
1388                         newnp->pktoptions = skb_clone_and_charge_r(ireq->pktopts, newsk);
1389                         consume_skb(ireq->pktopts);
1390                         ireq->pktopts = NULL;
1391                         if (newnp->pktoptions)
1392                                 tcp_v6_restore_cb(newnp->pktoptions);
1393                 }
1394         } else {
1395                 if (!req_unhash && found_dup_sk) {
1396                         /* This code path should only be executed in the
1397                          * syncookie case only
1398                          */
1399                         bh_unlock_sock(newsk);
1400                         sock_put(newsk);
1401                         newsk = NULL;
1402                 }
1403         }
1404
1405         return newsk;
1406
1407 out_overflow:
1408         __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1409 out_nonewsk:
1410         dst_release(dst);
1411 out:
1412         tcp_listendrop(sk);
1413         return NULL;
1414 }
1415
1416 INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *,
1417                                                            u32));
1418 /* The socket must have it's spinlock held when we get
1419  * here, unless it is a TCP_LISTEN socket.
1420  *
1421  * We have a potential double-lock case here, so even when
1422  * doing backlog processing we use the BH locking scheme.
1423  * This is because we cannot sleep with the original spinlock
1424  * held.
1425  */
1426 INDIRECT_CALLABLE_SCOPE
1427 int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1428 {
1429         struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1430         struct sk_buff *opt_skb = NULL;
1431         enum skb_drop_reason reason;
1432         struct tcp_sock *tp;
1433
1434         /* Imagine: socket is IPv6. IPv4 packet arrives,
1435            goes to IPv4 receive handler and backlogged.
1436            From backlog it always goes here. Kerboom...
1437            Fortunately, tcp_rcv_established and rcv_established
1438            handle them correctly, but it is not case with
1439            tcp_v6_hnd_req and tcp_v6_send_reset().   --ANK
1440          */
1441
1442         if (skb->protocol == htons(ETH_P_IP))
1443                 return tcp_v4_do_rcv(sk, skb);
1444
1445         /*
1446          *      socket locking is here for SMP purposes as backlog rcv
1447          *      is currently called with bh processing disabled.
1448          */
1449
1450         /* Do Stevens' IPV6_PKTOPTIONS.
1451
1452            Yes, guys, it is the only place in our code, where we
1453            may make it not affecting IPv4.
1454            The rest of code is protocol independent,
1455            and I do not like idea to uglify IPv4.
1456
1457            Actually, all the idea behind IPV6_PKTOPTIONS
1458            looks not very well thought. For now we latch
1459            options, received in the last packet, enqueued
1460            by tcp. Feel free to propose better solution.
1461                                                --ANK (980728)
1462          */
1463         if (np->rxopt.all)
1464                 opt_skb = skb_clone_and_charge_r(skb, sk);
1465
1466         reason = SKB_DROP_REASON_NOT_SPECIFIED;
1467         if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1468                 struct dst_entry *dst;
1469
1470                 dst = rcu_dereference_protected(sk->sk_rx_dst,
1471                                                 lockdep_sock_is_held(sk));
1472
1473                 sock_rps_save_rxhash(sk, skb);
1474                 sk_mark_napi_id(sk, skb);
1475                 if (dst) {
1476                         if (sk->sk_rx_dst_ifindex != skb->skb_iif ||
1477                             INDIRECT_CALL_1(dst->ops->check, ip6_dst_check,
1478                                             dst, sk->sk_rx_dst_cookie) == NULL) {
1479                                 RCU_INIT_POINTER(sk->sk_rx_dst, NULL);
1480                                 dst_release(dst);
1481                         }
1482                 }
1483
1484                 tcp_rcv_established(sk, skb);
1485                 if (opt_skb)
1486                         goto ipv6_pktoptions;
1487                 return 0;
1488         }
1489
1490         if (tcp_checksum_complete(skb))
1491                 goto csum_err;
1492
1493         if (sk->sk_state == TCP_LISTEN) {
1494                 struct sock *nsk = tcp_v6_cookie_check(sk, skb);
1495
1496                 if (!nsk)
1497                         goto discard;
1498
1499                 if (nsk != sk) {
1500                         if (tcp_child_process(sk, nsk, skb))
1501                                 goto reset;
1502                         if (opt_skb)
1503                                 __kfree_skb(opt_skb);
1504                         return 0;
1505                 }
1506         } else
1507                 sock_rps_save_rxhash(sk, skb);
1508
1509         if (tcp_rcv_state_process(sk, skb))
1510                 goto reset;
1511         if (opt_skb)
1512                 goto ipv6_pktoptions;
1513         return 0;
1514
1515 reset:
1516         tcp_v6_send_reset(sk, skb);
1517 discard:
1518         if (opt_skb)
1519                 __kfree_skb(opt_skb);
1520         kfree_skb_reason(skb, reason);
1521         return 0;
1522 csum_err:
1523         reason = SKB_DROP_REASON_TCP_CSUM;
1524         trace_tcp_bad_csum(skb);
1525         TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1526         TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1527         goto discard;
1528
1529
1530 ipv6_pktoptions:
1531         /* Do you ask, what is it?
1532
1533            1. skb was enqueued by tcp.
1534            2. skb is added to tail of read queue, rather than out of order.
1535            3. socket is not in passive state.
1536            4. Finally, it really contains options, which user wants to receive.
1537          */
1538         tp = tcp_sk(sk);
1539         if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1540             !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1541                 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1542                         np->mcast_oif = tcp_v6_iif(opt_skb);
1543                 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1544                         np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1545                 if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1546                         np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1547                 if (np->repflow)
1548                         np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1549                 if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
1550                         tcp_v6_restore_cb(opt_skb);
1551                         opt_skb = xchg(&np->pktoptions, opt_skb);
1552                 } else {
1553                         __kfree_skb(opt_skb);
1554                         opt_skb = xchg(&np->pktoptions, NULL);
1555                 }
1556         }
1557
1558         consume_skb(opt_skb);
1559         return 0;
1560 }
1561
1562 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
1563                            const struct tcphdr *th)
1564 {
1565         /* This is tricky: we move IP6CB at its correct location into
1566          * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
1567          * _decode_session6() uses IP6CB().
1568          * barrier() makes sure compiler won't play aliasing games.
1569          */
1570         memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
1571                 sizeof(struct inet6_skb_parm));
1572         barrier();
1573
1574         TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1575         TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1576                                     skb->len - th->doff*4);
1577         TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1578         TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1579         TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1580         TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1581         TCP_SKB_CB(skb)->sacked = 0;
1582         TCP_SKB_CB(skb)->has_rxtstamp =
1583                         skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1584 }
1585
1586 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
1587 {
1588         enum skb_drop_reason drop_reason;
1589         int sdif = inet6_sdif(skb);
1590         int dif = inet6_iif(skb);
1591         const struct tcphdr *th;
1592         const struct ipv6hdr *hdr;
1593         bool refcounted;
1594         struct sock *sk;
1595         int ret;
1596         struct net *net = dev_net(skb->dev);
1597
1598         drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
1599         if (skb->pkt_type != PACKET_HOST)
1600                 goto discard_it;
1601
1602         /*
1603          *      Count it even if it's bad.
1604          */
1605         __TCP_INC_STATS(net, TCP_MIB_INSEGS);
1606
1607         if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1608                 goto discard_it;
1609
1610         th = (const struct tcphdr *)skb->data;
1611
1612         if (unlikely(th->doff < sizeof(struct tcphdr) / 4)) {
1613                 drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL;
1614                 goto bad_packet;
1615         }
1616         if (!pskb_may_pull(skb, th->doff*4))
1617                 goto discard_it;
1618
1619         if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1620                 goto csum_error;
1621
1622         th = (const struct tcphdr *)skb->data;
1623         hdr = ipv6_hdr(skb);
1624
1625 lookup:
1626         sk = __inet6_lookup_skb(net->ipv4.tcp_death_row.hashinfo, skb, __tcp_hdrlen(th),
1627                                 th->source, th->dest, inet6_iif(skb), sdif,
1628                                 &refcounted);
1629         if (!sk)
1630                 goto no_tcp_socket;
1631
1632 process:
1633         if (sk->sk_state == TCP_TIME_WAIT)
1634                 goto do_time_wait;
1635
1636         if (sk->sk_state == TCP_NEW_SYN_RECV) {
1637                 struct request_sock *req = inet_reqsk(sk);
1638                 bool req_stolen = false;
1639                 struct sock *nsk;
1640
1641                 sk = req->rsk_listener;
1642                 drop_reason = tcp_inbound_md5_hash(sk, skb,
1643                                                    &hdr->saddr, &hdr->daddr,
1644                                                    AF_INET6, dif, sdif);
1645                 if (drop_reason) {
1646                         sk_drops_add(sk, skb);
1647                         reqsk_put(req);
1648                         goto discard_it;
1649                 }
1650                 if (tcp_checksum_complete(skb)) {
1651                         reqsk_put(req);
1652                         goto csum_error;
1653                 }
1654                 if (unlikely(sk->sk_state != TCP_LISTEN)) {
1655                         nsk = reuseport_migrate_sock(sk, req_to_sk(req), skb);
1656                         if (!nsk) {
1657                                 inet_csk_reqsk_queue_drop_and_put(sk, req);
1658                                 goto lookup;
1659                         }
1660                         sk = nsk;
1661                         /* reuseport_migrate_sock() has already held one sk_refcnt
1662                          * before returning.
1663                          */
1664                 } else {
1665                         sock_hold(sk);
1666                 }
1667                 refcounted = true;
1668                 nsk = NULL;
1669                 if (!tcp_filter(sk, skb)) {
1670                         th = (const struct tcphdr *)skb->data;
1671                         hdr = ipv6_hdr(skb);
1672                         tcp_v6_fill_cb(skb, hdr, th);
1673                         nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
1674                 } else {
1675                         drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
1676                 }
1677                 if (!nsk) {
1678                         reqsk_put(req);
1679                         if (req_stolen) {
1680                                 /* Another cpu got exclusive access to req
1681                                  * and created a full blown socket.
1682                                  * Try to feed this packet to this socket
1683                                  * instead of discarding it.
1684                                  */
1685                                 tcp_v6_restore_cb(skb);
1686                                 sock_put(sk);
1687                                 goto lookup;
1688                         }
1689                         goto discard_and_relse;
1690                 }
1691                 if (nsk == sk) {
1692                         reqsk_put(req);
1693                         tcp_v6_restore_cb(skb);
1694                 } else if (tcp_child_process(sk, nsk, skb)) {
1695                         tcp_v6_send_reset(nsk, skb);
1696                         goto discard_and_relse;
1697                 } else {
1698                         sock_put(sk);
1699                         return 0;
1700                 }
1701         }
1702
1703         if (static_branch_unlikely(&ip6_min_hopcount)) {
1704                 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
1705                 if (unlikely(hdr->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount))) {
1706                         __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1707                         drop_reason = SKB_DROP_REASON_TCP_MINTTL;
1708                         goto discard_and_relse;
1709                 }
1710         }
1711
1712         if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) {
1713                 drop_reason = SKB_DROP_REASON_XFRM_POLICY;
1714                 goto discard_and_relse;
1715         }
1716
1717         drop_reason = tcp_inbound_md5_hash(sk, skb, &hdr->saddr, &hdr->daddr,
1718                                            AF_INET6, dif, sdif);
1719         if (drop_reason)
1720                 goto discard_and_relse;
1721
1722         nf_reset_ct(skb);
1723
1724         if (tcp_filter(sk, skb)) {
1725                 drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
1726                 goto discard_and_relse;
1727         }
1728         th = (const struct tcphdr *)skb->data;
1729         hdr = ipv6_hdr(skb);
1730         tcp_v6_fill_cb(skb, hdr, th);
1731
1732         skb->dev = NULL;
1733
1734         if (sk->sk_state == TCP_LISTEN) {
1735                 ret = tcp_v6_do_rcv(sk, skb);
1736                 goto put_and_return;
1737         }
1738
1739         sk_incoming_cpu_update(sk);
1740
1741         bh_lock_sock_nested(sk);
1742         tcp_segs_in(tcp_sk(sk), skb);
1743         ret = 0;
1744         if (!sock_owned_by_user(sk)) {
1745                 ret = tcp_v6_do_rcv(sk, skb);
1746         } else {
1747                 if (tcp_add_backlog(sk, skb, &drop_reason))
1748                         goto discard_and_relse;
1749         }
1750         bh_unlock_sock(sk);
1751 put_and_return:
1752         if (refcounted)
1753                 sock_put(sk);
1754         return ret ? -1 : 0;
1755
1756 no_tcp_socket:
1757         drop_reason = SKB_DROP_REASON_NO_SOCKET;
1758         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1759                 goto discard_it;
1760
1761         tcp_v6_fill_cb(skb, hdr, th);
1762
1763         if (tcp_checksum_complete(skb)) {
1764 csum_error:
1765                 drop_reason = SKB_DROP_REASON_TCP_CSUM;
1766                 trace_tcp_bad_csum(skb);
1767                 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1768 bad_packet:
1769                 __TCP_INC_STATS(net, TCP_MIB_INERRS);
1770         } else {
1771                 tcp_v6_send_reset(NULL, skb);
1772         }
1773
1774 discard_it:
1775         SKB_DR_OR(drop_reason, NOT_SPECIFIED);
1776         kfree_skb_reason(skb, drop_reason);
1777         return 0;
1778
1779 discard_and_relse:
1780         sk_drops_add(sk, skb);
1781         if (refcounted)
1782                 sock_put(sk);
1783         goto discard_it;
1784
1785 do_time_wait:
1786         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1787                 drop_reason = SKB_DROP_REASON_XFRM_POLICY;
1788                 inet_twsk_put(inet_twsk(sk));
1789                 goto discard_it;
1790         }
1791
1792         tcp_v6_fill_cb(skb, hdr, th);
1793
1794         if (tcp_checksum_complete(skb)) {
1795                 inet_twsk_put(inet_twsk(sk));
1796                 goto csum_error;
1797         }
1798
1799         switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1800         case TCP_TW_SYN:
1801         {
1802                 struct sock *sk2;
1803
1804                 sk2 = inet6_lookup_listener(net, net->ipv4.tcp_death_row.hashinfo,
1805                                             skb, __tcp_hdrlen(th),
1806                                             &ipv6_hdr(skb)->saddr, th->source,
1807                                             &ipv6_hdr(skb)->daddr,
1808                                             ntohs(th->dest),
1809                                             tcp_v6_iif_l3_slave(skb),
1810                                             sdif);
1811                 if (sk2) {
1812                         struct inet_timewait_sock *tw = inet_twsk(sk);
1813                         inet_twsk_deschedule_put(tw);
1814                         sk = sk2;
1815                         tcp_v6_restore_cb(skb);
1816                         refcounted = false;
1817                         goto process;
1818                 }
1819         }
1820                 /* to ACK */
1821                 fallthrough;
1822         case TCP_TW_ACK:
1823                 tcp_v6_timewait_ack(sk, skb);
1824                 break;
1825         case TCP_TW_RST:
1826                 tcp_v6_send_reset(sk, skb);
1827                 inet_twsk_deschedule_put(inet_twsk(sk));
1828                 goto discard_it;
1829         case TCP_TW_SUCCESS:
1830                 ;
1831         }
1832         goto discard_it;
1833 }
1834
1835 void tcp_v6_early_demux(struct sk_buff *skb)
1836 {
1837         struct net *net = dev_net(skb->dev);
1838         const struct ipv6hdr *hdr;
1839         const struct tcphdr *th;
1840         struct sock *sk;
1841
1842         if (skb->pkt_type != PACKET_HOST)
1843                 return;
1844
1845         if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1846                 return;
1847
1848         hdr = ipv6_hdr(skb);
1849         th = tcp_hdr(skb);
1850
1851         if (th->doff < sizeof(struct tcphdr) / 4)
1852                 return;
1853
1854         /* Note : We use inet6_iif() here, not tcp_v6_iif() */
1855         sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
1856                                         &hdr->saddr, th->source,
1857                                         &hdr->daddr, ntohs(th->dest),
1858                                         inet6_iif(skb), inet6_sdif(skb));
1859         if (sk) {
1860                 skb->sk = sk;
1861                 skb->destructor = sock_edemux;
1862                 if (sk_fullsock(sk)) {
1863                         struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst);
1864
1865                         if (dst)
1866                                 dst = dst_check(dst, sk->sk_rx_dst_cookie);
1867                         if (dst &&
1868                             sk->sk_rx_dst_ifindex == skb->skb_iif)
1869                                 skb_dst_set_noref(skb, dst);
1870                 }
1871         }
1872 }
1873
1874 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1875         .twsk_obj_size  = sizeof(struct tcp6_timewait_sock),
1876         .twsk_unique    = tcp_twsk_unique,
1877         .twsk_destructor = tcp_twsk_destructor,
1878 };
1879
1880 INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)
1881 {
1882         __tcp_v6_send_check(skb, &sk->sk_v6_rcv_saddr, &sk->sk_v6_daddr);
1883 }
1884
1885 const struct inet_connection_sock_af_ops ipv6_specific = {
1886         .queue_xmit        = inet6_csk_xmit,
1887         .send_check        = tcp_v6_send_check,
1888         .rebuild_header    = inet6_sk_rebuild_header,
1889         .sk_rx_dst_set     = inet6_sk_rx_dst_set,
1890         .conn_request      = tcp_v6_conn_request,
1891         .syn_recv_sock     = tcp_v6_syn_recv_sock,
1892         .net_header_len    = sizeof(struct ipv6hdr),
1893         .net_frag_header_len = sizeof(struct frag_hdr),
1894         .setsockopt        = ipv6_setsockopt,
1895         .getsockopt        = ipv6_getsockopt,
1896         .addr2sockaddr     = inet6_csk_addr2sockaddr,
1897         .sockaddr_len      = sizeof(struct sockaddr_in6),
1898         .mtu_reduced       = tcp_v6_mtu_reduced,
1899 };
1900
1901 #ifdef CONFIG_TCP_MD5SIG
1902 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1903         .md5_lookup     =       tcp_v6_md5_lookup,
1904         .calc_md5_hash  =       tcp_v6_md5_hash_skb,
1905         .md5_parse      =       tcp_v6_parse_md5_keys,
1906 };
1907 #endif
1908
1909 /*
1910  *      TCP over IPv4 via INET6 API
1911  */
1912 static const struct inet_connection_sock_af_ops ipv6_mapped = {
1913         .queue_xmit        = ip_queue_xmit,
1914         .send_check        = tcp_v4_send_check,
1915         .rebuild_header    = inet_sk_rebuild_header,
1916         .sk_rx_dst_set     = inet_sk_rx_dst_set,
1917         .conn_request      = tcp_v6_conn_request,
1918         .syn_recv_sock     = tcp_v6_syn_recv_sock,
1919         .net_header_len    = sizeof(struct iphdr),
1920         .setsockopt        = ipv6_setsockopt,
1921         .getsockopt        = ipv6_getsockopt,
1922         .addr2sockaddr     = inet6_csk_addr2sockaddr,
1923         .sockaddr_len      = sizeof(struct sockaddr_in6),
1924         .mtu_reduced       = tcp_v4_mtu_reduced,
1925 };
1926
1927 #ifdef CONFIG_TCP_MD5SIG
1928 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1929         .md5_lookup     =       tcp_v4_md5_lookup,
1930         .calc_md5_hash  =       tcp_v4_md5_hash_skb,
1931         .md5_parse      =       tcp_v6_parse_md5_keys,
1932 };
1933 #endif
1934
1935 /* NOTE: A lot of things set to zero explicitly by call to
1936  *       sk_alloc() so need not be done here.
1937  */
1938 static int tcp_v6_init_sock(struct sock *sk)
1939 {
1940         struct inet_connection_sock *icsk = inet_csk(sk);
1941
1942         tcp_init_sock(sk);
1943
1944         icsk->icsk_af_ops = &ipv6_specific;
1945
1946 #ifdef CONFIG_TCP_MD5SIG
1947         tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1948 #endif
1949
1950         return 0;
1951 }
1952
1953 #ifdef CONFIG_PROC_FS
1954 /* Proc filesystem TCPv6 sock list dumping. */
1955 static void get_openreq6(struct seq_file *seq,
1956                          const struct request_sock *req, int i)
1957 {
1958         long ttd = req->rsk_timer.expires - jiffies;
1959         const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
1960         const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
1961
1962         if (ttd < 0)
1963                 ttd = 0;
1964
1965         seq_printf(seq,
1966                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1967                    "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
1968                    i,
1969                    src->s6_addr32[0], src->s6_addr32[1],
1970                    src->s6_addr32[2], src->s6_addr32[3],
1971                    inet_rsk(req)->ir_num,
1972                    dest->s6_addr32[0], dest->s6_addr32[1],
1973                    dest->s6_addr32[2], dest->s6_addr32[3],
1974                    ntohs(inet_rsk(req)->ir_rmt_port),
1975                    TCP_SYN_RECV,
1976                    0, 0, /* could print option size, but that is af dependent. */
1977                    1,   /* timers active (only the expire timer) */
1978                    jiffies_to_clock_t(ttd),
1979                    req->num_timeout,
1980                    from_kuid_munged(seq_user_ns(seq),
1981                                     sock_i_uid(req->rsk_listener)),
1982                    0,  /* non standard timer */
1983                    0, /* open_requests have no inode */
1984                    0, req);
1985 }
1986
1987 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1988 {
1989         const struct in6_addr *dest, *src;
1990         __u16 destp, srcp;
1991         int timer_active;
1992         unsigned long timer_expires;
1993         const struct inet_sock *inet = inet_sk(sp);
1994         const struct tcp_sock *tp = tcp_sk(sp);
1995         const struct inet_connection_sock *icsk = inet_csk(sp);
1996         const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
1997         int rx_queue;
1998         int state;
1999
2000         dest  = &sp->sk_v6_daddr;
2001         src   = &sp->sk_v6_rcv_saddr;
2002         destp = ntohs(inet->inet_dport);
2003         srcp  = ntohs(inet->inet_sport);
2004
2005         if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
2006             icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
2007             icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
2008                 timer_active    = 1;
2009                 timer_expires   = icsk->icsk_timeout;
2010         } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2011                 timer_active    = 4;
2012                 timer_expires   = icsk->icsk_timeout;
2013         } else if (timer_pending(&sp->sk_timer)) {
2014                 timer_active    = 2;
2015                 timer_expires   = sp->sk_timer.expires;
2016         } else {
2017                 timer_active    = 0;
2018                 timer_expires = jiffies;
2019         }
2020
2021         state = inet_sk_state_load(sp);
2022         if (state == TCP_LISTEN)
2023                 rx_queue = READ_ONCE(sp->sk_ack_backlog);
2024         else
2025                 /* Because we don't lock the socket,
2026                  * we might find a transient negative value.
2027                  */
2028                 rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
2029                                       READ_ONCE(tp->copied_seq), 0);
2030
2031         seq_printf(seq,
2032                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2033                    "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
2034                    i,
2035                    src->s6_addr32[0], src->s6_addr32[1],
2036                    src->s6_addr32[2], src->s6_addr32[3], srcp,
2037                    dest->s6_addr32[0], dest->s6_addr32[1],
2038                    dest->s6_addr32[2], dest->s6_addr32[3], destp,
2039                    state,
2040                    READ_ONCE(tp->write_seq) - tp->snd_una,
2041                    rx_queue,
2042                    timer_active,
2043                    jiffies_delta_to_clock_t(timer_expires - jiffies),
2044                    icsk->icsk_retransmits,
2045                    from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
2046                    icsk->icsk_probes_out,
2047                    sock_i_ino(sp),
2048                    refcount_read(&sp->sk_refcnt), sp,
2049                    jiffies_to_clock_t(icsk->icsk_rto),
2050                    jiffies_to_clock_t(icsk->icsk_ack.ato),
2051                    (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp),
2052                    tcp_snd_cwnd(tp),
2053                    state == TCP_LISTEN ?
2054                         fastopenq->max_qlen :
2055                         (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
2056                    );
2057 }
2058
2059 static void get_timewait6_sock(struct seq_file *seq,
2060                                struct inet_timewait_sock *tw, int i)
2061 {
2062         long delta = tw->tw_timer.expires - jiffies;
2063         const struct in6_addr *dest, *src;
2064         __u16 destp, srcp;
2065
2066         dest = &tw->tw_v6_daddr;
2067         src  = &tw->tw_v6_rcv_saddr;
2068         destp = ntohs(tw->tw_dport);
2069         srcp  = ntohs(tw->tw_sport);
2070
2071         seq_printf(seq,
2072                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2073                    "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
2074                    i,
2075                    src->s6_addr32[0], src->s6_addr32[1],
2076                    src->s6_addr32[2], src->s6_addr32[3], srcp,
2077                    dest->s6_addr32[0], dest->s6_addr32[1],
2078                    dest->s6_addr32[2], dest->s6_addr32[3], destp,
2079                    tw->tw_substate, 0, 0,
2080                    3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
2081                    refcount_read(&tw->tw_refcnt), tw);
2082 }
2083
2084 static int tcp6_seq_show(struct seq_file *seq, void *v)
2085 {
2086         struct tcp_iter_state *st;
2087         struct sock *sk = v;
2088
2089         if (v == SEQ_START_TOKEN) {
2090                 seq_puts(seq,
2091                          "  sl  "
2092                          "local_address                         "
2093                          "remote_address                        "
2094                          "st tx_queue rx_queue tr tm->when retrnsmt"
2095                          "   uid  timeout inode\n");
2096                 goto out;
2097         }
2098         st = seq->private;
2099
2100         if (sk->sk_state == TCP_TIME_WAIT)
2101                 get_timewait6_sock(seq, v, st->num);
2102         else if (sk->sk_state == TCP_NEW_SYN_RECV)
2103                 get_openreq6(seq, v, st->num);
2104         else
2105                 get_tcp6_sock(seq, v, st->num);
2106 out:
2107         return 0;
2108 }
2109
2110 static const struct seq_operations tcp6_seq_ops = {
2111         .show           = tcp6_seq_show,
2112         .start          = tcp_seq_start,
2113         .next           = tcp_seq_next,
2114         .stop           = tcp_seq_stop,
2115 };
2116
2117 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2118         .family         = AF_INET6,
2119 };
2120
2121 int __net_init tcp6_proc_init(struct net *net)
2122 {
2123         if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops,
2124                         sizeof(struct tcp_iter_state), &tcp6_seq_afinfo))
2125                 return -ENOMEM;
2126         return 0;
2127 }
2128
2129 void tcp6_proc_exit(struct net *net)
2130 {
2131         remove_proc_entry("tcp6", net->proc_net);
2132 }
2133 #endif
2134
2135 struct proto tcpv6_prot = {
2136         .name                   = "TCPv6",
2137         .owner                  = THIS_MODULE,
2138         .close                  = tcp_close,
2139         .pre_connect            = tcp_v6_pre_connect,
2140         .connect                = tcp_v6_connect,
2141         .disconnect             = tcp_disconnect,
2142         .accept                 = inet_csk_accept,
2143         .ioctl                  = tcp_ioctl,
2144         .init                   = tcp_v6_init_sock,
2145         .destroy                = tcp_v4_destroy_sock,
2146         .shutdown               = tcp_shutdown,
2147         .setsockopt             = tcp_setsockopt,
2148         .getsockopt             = tcp_getsockopt,
2149         .bpf_bypass_getsockopt  = tcp_bpf_bypass_getsockopt,
2150         .keepalive              = tcp_set_keepalive,
2151         .recvmsg                = tcp_recvmsg,
2152         .sendmsg                = tcp_sendmsg,
2153         .sendpage               = tcp_sendpage,
2154         .backlog_rcv            = tcp_v6_do_rcv,
2155         .release_cb             = tcp_release_cb,
2156         .hash                   = inet6_hash,
2157         .unhash                 = inet_unhash,
2158         .get_port               = inet_csk_get_port,
2159         .put_port               = inet_put_port,
2160 #ifdef CONFIG_BPF_SYSCALL
2161         .psock_update_sk_prot   = tcp_bpf_update_proto,
2162 #endif
2163         .enter_memory_pressure  = tcp_enter_memory_pressure,
2164         .leave_memory_pressure  = tcp_leave_memory_pressure,
2165         .stream_memory_free     = tcp_stream_memory_free,
2166         .sockets_allocated      = &tcp_sockets_allocated,
2167
2168         .memory_allocated       = &tcp_memory_allocated,
2169         .per_cpu_fw_alloc       = &tcp_memory_per_cpu_fw_alloc,
2170
2171         .memory_pressure        = &tcp_memory_pressure,
2172         .orphan_count           = &tcp_orphan_count,
2173         .sysctl_mem             = sysctl_tcp_mem,
2174         .sysctl_wmem_offset     = offsetof(struct net, ipv4.sysctl_tcp_wmem),
2175         .sysctl_rmem_offset     = offsetof(struct net, ipv4.sysctl_tcp_rmem),
2176         .max_header             = MAX_TCP_HEADER,
2177         .obj_size               = sizeof(struct tcp6_sock),
2178         .slab_flags             = SLAB_TYPESAFE_BY_RCU,
2179         .twsk_prot              = &tcp6_timewait_sock_ops,
2180         .rsk_prot               = &tcp6_request_sock_ops,
2181         .h.hashinfo             = NULL,
2182         .no_autobind            = true,
2183         .diag_destroy           = tcp_abort,
2184 };
2185 EXPORT_SYMBOL_GPL(tcpv6_prot);
2186
2187 static const struct inet6_protocol tcpv6_protocol = {
2188         .handler        =       tcp_v6_rcv,
2189         .err_handler    =       tcp_v6_err,
2190         .flags          =       INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2191 };
2192
2193 static struct inet_protosw tcpv6_protosw = {
2194         .type           =       SOCK_STREAM,
2195         .protocol       =       IPPROTO_TCP,
2196         .prot           =       &tcpv6_prot,
2197         .ops            =       &inet6_stream_ops,
2198         .flags          =       INET_PROTOSW_PERMANENT |
2199                                 INET_PROTOSW_ICSK,
2200 };
2201
2202 static int __net_init tcpv6_net_init(struct net *net)
2203 {
2204         return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2205                                     SOCK_RAW, IPPROTO_TCP, net);
2206 }
2207
2208 static void __net_exit tcpv6_net_exit(struct net *net)
2209 {
2210         inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2211 }
2212
2213 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
2214 {
2215         tcp_twsk_purge(net_exit_list, AF_INET6);
2216 }
2217
2218 static struct pernet_operations tcpv6_net_ops = {
2219         .init       = tcpv6_net_init,
2220         .exit       = tcpv6_net_exit,
2221         .exit_batch = tcpv6_net_exit_batch,
2222 };
2223
2224 int __init tcpv6_init(void)
2225 {
2226         int ret;
2227
2228         ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2229         if (ret)
2230                 goto out;
2231
2232         /* register inet6 protocol */
2233         ret = inet6_register_protosw(&tcpv6_protosw);
2234         if (ret)
2235                 goto out_tcpv6_protocol;
2236
2237         ret = register_pernet_subsys(&tcpv6_net_ops);
2238         if (ret)
2239                 goto out_tcpv6_protosw;
2240
2241         ret = mptcpv6_init();
2242         if (ret)
2243                 goto out_tcpv6_pernet_subsys;
2244
2245 out:
2246         return ret;
2247
2248 out_tcpv6_pernet_subsys:
2249         unregister_pernet_subsys(&tcpv6_net_ops);
2250 out_tcpv6_protosw:
2251         inet6_unregister_protosw(&tcpv6_protosw);
2252 out_tcpv6_protocol:
2253         inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2254         goto out;
2255 }
2256
2257 void tcpv6_exit(void)
2258 {
2259         unregister_pernet_subsys(&tcpv6_net_ops);
2260         inet6_unregister_protosw(&tcpv6_protosw);
2261         inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2262 }