dccp/tcp: Reset saddr on failure after inet6?_hash_connect().
[platform/kernel/linux-starfive.git] / net / ipv6 / tcp_ipv6.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *      TCP over IPv6
4  *      Linux INET6 implementation
5  *
6  *      Authors:
7  *      Pedro Roque             <roque@di.fc.ul.pt>
8  *
9  *      Based on:
10  *      linux/net/ipv4/tcp.c
11  *      linux/net/ipv4/tcp_input.c
12  *      linux/net/ipv4/tcp_output.c
13  *
14  *      Fixes:
15  *      Hideaki YOSHIFUJI       :       sin6_scope_id support
16  *      YOSHIFUJI Hideaki @USAGI and:   Support IPV6_V6ONLY socket option, which
17  *      Alexey Kuznetsov                allow both IPv4 and IPv6 sockets to bind
18  *                                      a single port at the same time.
19  *      YOSHIFUJI Hideaki @USAGI:       convert /proc/net/tcp6 to seq_file.
20  */
21
22 #include <linux/bottom_half.h>
23 #include <linux/module.h>
24 #include <linux/errno.h>
25 #include <linux/types.h>
26 #include <linux/socket.h>
27 #include <linux/sockios.h>
28 #include <linux/net.h>
29 #include <linux/jiffies.h>
30 #include <linux/in.h>
31 #include <linux/in6.h>
32 #include <linux/netdevice.h>
33 #include <linux/init.h>
34 #include <linux/jhash.h>
35 #include <linux/ipsec.h>
36 #include <linux/times.h>
37 #include <linux/slab.h>
38 #include <linux/uaccess.h>
39 #include <linux/ipv6.h>
40 #include <linux/icmpv6.h>
41 #include <linux/random.h>
42 #include <linux/indirect_call_wrapper.h>
43
44 #include <net/tcp.h>
45 #include <net/ndisc.h>
46 #include <net/inet6_hashtables.h>
47 #include <net/inet6_connection_sock.h>
48 #include <net/ipv6.h>
49 #include <net/transp_v6.h>
50 #include <net/addrconf.h>
51 #include <net/ip6_route.h>
52 #include <net/ip6_checksum.h>
53 #include <net/inet_ecn.h>
54 #include <net/protocol.h>
55 #include <net/xfrm.h>
56 #include <net/snmp.h>
57 #include <net/dsfield.h>
58 #include <net/timewait_sock.h>
59 #include <net/inet_common.h>
60 #include <net/secure_seq.h>
61 #include <net/busy_poll.h>
62
63 #include <linux/proc_fs.h>
64 #include <linux/seq_file.h>
65
66 #include <crypto/hash.h>
67 #include <linux/scatterlist.h>
68
69 #include <trace/events/tcp.h>
70
71 static void     tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
72 static void     tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
73                                       struct request_sock *req);
74
75 INDIRECT_CALLABLE_SCOPE int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
76
77 static const struct inet_connection_sock_af_ops ipv6_mapped;
78 const struct inet_connection_sock_af_ops ipv6_specific;
79 #ifdef CONFIG_TCP_MD5SIG
80 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
81 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
82 #else
83 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
84                                                    const struct in6_addr *addr,
85                                                    int l3index)
86 {
87         return NULL;
88 }
89 #endif
90
91 /* Helper returning the inet6 address from a given tcp socket.
92  * It can be used in TCP stack instead of inet6_sk(sk).
93  * This avoids a dereference and allow compiler optimizations.
94  * It is a specialized version of inet6_sk_generic().
95  */
96 static struct ipv6_pinfo *tcp_inet6_sk(const struct sock *sk)
97 {
98         unsigned int offset = sizeof(struct tcp6_sock) - sizeof(struct ipv6_pinfo);
99
100         return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
101 }
102
103 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
104 {
105         struct dst_entry *dst = skb_dst(skb);
106
107         if (dst && dst_hold_safe(dst)) {
108                 const struct rt6_info *rt = (const struct rt6_info *)dst;
109
110                 rcu_assign_pointer(sk->sk_rx_dst, dst);
111                 sk->sk_rx_dst_ifindex = skb->skb_iif;
112                 sk->sk_rx_dst_cookie = rt6_get_cookie(rt);
113         }
114 }
115
116 static u32 tcp_v6_init_seq(const struct sk_buff *skb)
117 {
118         return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32,
119                                 ipv6_hdr(skb)->saddr.s6_addr32,
120                                 tcp_hdr(skb)->dest,
121                                 tcp_hdr(skb)->source);
122 }
123
124 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
125 {
126         return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32,
127                                    ipv6_hdr(skb)->saddr.s6_addr32);
128 }
129
130 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
131                               int addr_len)
132 {
133         /* This check is replicated from tcp_v6_connect() and intended to
134          * prevent BPF program called below from accessing bytes that are out
135          * of the bound specified by user in addr_len.
136          */
137         if (addr_len < SIN6_LEN_RFC2133)
138                 return -EINVAL;
139
140         sock_owned_by_me(sk);
141
142         return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr);
143 }
144
145 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
146                           int addr_len)
147 {
148         struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
149         struct inet_connection_sock *icsk = inet_csk(sk);
150         struct in6_addr *saddr = NULL, *final_p, final;
151         struct inet_timewait_death_row *tcp_death_row;
152         struct ipv6_pinfo *np = tcp_inet6_sk(sk);
153         struct inet_sock *inet = inet_sk(sk);
154         struct tcp_sock *tp = tcp_sk(sk);
155         struct net *net = sock_net(sk);
156         struct ipv6_txoptions *opt;
157         struct dst_entry *dst;
158         struct flowi6 fl6;
159         int addr_type;
160         int err;
161
162         if (addr_len < SIN6_LEN_RFC2133)
163                 return -EINVAL;
164
165         if (usin->sin6_family != AF_INET6)
166                 return -EAFNOSUPPORT;
167
168         memset(&fl6, 0, sizeof(fl6));
169
170         if (np->sndflow) {
171                 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
172                 IP6_ECN_flow_init(fl6.flowlabel);
173                 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
174                         struct ip6_flowlabel *flowlabel;
175                         flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
176                         if (IS_ERR(flowlabel))
177                                 return -EINVAL;
178                         fl6_sock_release(flowlabel);
179                 }
180         }
181
182         /*
183          *      connect() to INADDR_ANY means loopback (BSD'ism).
184          */
185
186         if (ipv6_addr_any(&usin->sin6_addr)) {
187                 if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
188                         ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
189                                                &usin->sin6_addr);
190                 else
191                         usin->sin6_addr = in6addr_loopback;
192         }
193
194         addr_type = ipv6_addr_type(&usin->sin6_addr);
195
196         if (addr_type & IPV6_ADDR_MULTICAST)
197                 return -ENETUNREACH;
198
199         if (addr_type&IPV6_ADDR_LINKLOCAL) {
200                 if (addr_len >= sizeof(struct sockaddr_in6) &&
201                     usin->sin6_scope_id) {
202                         /* If interface is set while binding, indices
203                          * must coincide.
204                          */
205                         if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id))
206                                 return -EINVAL;
207
208                         sk->sk_bound_dev_if = usin->sin6_scope_id;
209                 }
210
211                 /* Connect to link-local address requires an interface */
212                 if (!sk->sk_bound_dev_if)
213                         return -EINVAL;
214         }
215
216         if (tp->rx_opt.ts_recent_stamp &&
217             !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
218                 tp->rx_opt.ts_recent = 0;
219                 tp->rx_opt.ts_recent_stamp = 0;
220                 WRITE_ONCE(tp->write_seq, 0);
221         }
222
223         sk->sk_v6_daddr = usin->sin6_addr;
224         np->flow_label = fl6.flowlabel;
225
226         /*
227          *      TCP over IPv4
228          */
229
230         if (addr_type & IPV6_ADDR_MAPPED) {
231                 u32 exthdrlen = icsk->icsk_ext_hdr_len;
232                 struct sockaddr_in sin;
233
234                 if (ipv6_only_sock(sk))
235                         return -ENETUNREACH;
236
237                 sin.sin_family = AF_INET;
238                 sin.sin_port = usin->sin6_port;
239                 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
240
241                 /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */
242                 WRITE_ONCE(icsk->icsk_af_ops, &ipv6_mapped);
243                 if (sk_is_mptcp(sk))
244                         mptcpv6_handle_mapped(sk, true);
245                 sk->sk_backlog_rcv = tcp_v4_do_rcv;
246 #ifdef CONFIG_TCP_MD5SIG
247                 tp->af_specific = &tcp_sock_ipv6_mapped_specific;
248 #endif
249
250                 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
251
252                 if (err) {
253                         icsk->icsk_ext_hdr_len = exthdrlen;
254                         /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */
255                         WRITE_ONCE(icsk->icsk_af_ops, &ipv6_specific);
256                         if (sk_is_mptcp(sk))
257                                 mptcpv6_handle_mapped(sk, false);
258                         sk->sk_backlog_rcv = tcp_v6_do_rcv;
259 #ifdef CONFIG_TCP_MD5SIG
260                         tp->af_specific = &tcp_sock_ipv6_specific;
261 #endif
262                         goto failure;
263                 }
264                 np->saddr = sk->sk_v6_rcv_saddr;
265
266                 return err;
267         }
268
269         if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
270                 saddr = &sk->sk_v6_rcv_saddr;
271
272         fl6.flowi6_proto = IPPROTO_TCP;
273         fl6.daddr = sk->sk_v6_daddr;
274         fl6.saddr = saddr ? *saddr : np->saddr;
275         fl6.flowi6_oif = sk->sk_bound_dev_if;
276         fl6.flowi6_mark = sk->sk_mark;
277         fl6.fl6_dport = usin->sin6_port;
278         fl6.fl6_sport = inet->inet_sport;
279         fl6.flowi6_uid = sk->sk_uid;
280
281         opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
282         final_p = fl6_update_dst(&fl6, opt, &final);
283
284         security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
285
286         dst = ip6_dst_lookup_flow(net, sk, &fl6, final_p);
287         if (IS_ERR(dst)) {
288                 err = PTR_ERR(dst);
289                 goto failure;
290         }
291
292         tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
293
294         if (!saddr) {
295                 struct inet_bind_hashbucket *prev_addr_hashbucket = NULL;
296                 struct in6_addr prev_v6_rcv_saddr;
297
298                 if (icsk->icsk_bind2_hash) {
299                         prev_addr_hashbucket = inet_bhashfn_portaddr(tcp_death_row->hashinfo,
300                                                                      sk, net, inet->inet_num);
301                         prev_v6_rcv_saddr = sk->sk_v6_rcv_saddr;
302                 }
303                 saddr = &fl6.saddr;
304                 sk->sk_v6_rcv_saddr = *saddr;
305
306                 if (prev_addr_hashbucket) {
307                         err = inet_bhash2_update_saddr(prev_addr_hashbucket, sk);
308                         if (err) {
309                                 sk->sk_v6_rcv_saddr = prev_v6_rcv_saddr;
310                                 goto failure;
311                         }
312                 }
313         }
314
315         /* set the source address */
316         np->saddr = *saddr;
317         inet->inet_rcv_saddr = LOOPBACK4_IPV6;
318
319         sk->sk_gso_type = SKB_GSO_TCPV6;
320         ip6_dst_store(sk, dst, NULL, NULL);
321
322         icsk->icsk_ext_hdr_len = 0;
323         if (opt)
324                 icsk->icsk_ext_hdr_len = opt->opt_flen +
325                                          opt->opt_nflen;
326
327         tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
328
329         inet->inet_dport = usin->sin6_port;
330
331         tcp_set_state(sk, TCP_SYN_SENT);
332         err = inet6_hash_connect(tcp_death_row, sk);
333         if (err)
334                 goto late_failure;
335
336         sk_set_txhash(sk);
337
338         if (likely(!tp->repair)) {
339                 if (!tp->write_seq)
340                         WRITE_ONCE(tp->write_seq,
341                                    secure_tcpv6_seq(np->saddr.s6_addr32,
342                                                     sk->sk_v6_daddr.s6_addr32,
343                                                     inet->inet_sport,
344                                                     inet->inet_dport));
345                 tp->tsoffset = secure_tcpv6_ts_off(net, np->saddr.s6_addr32,
346                                                    sk->sk_v6_daddr.s6_addr32);
347         }
348
349         if (tcp_fastopen_defer_connect(sk, &err))
350                 return err;
351         if (err)
352                 goto late_failure;
353
354         err = tcp_connect(sk);
355         if (err)
356                 goto late_failure;
357
358         return 0;
359
360 late_failure:
361         tcp_set_state(sk, TCP_CLOSE);
362         if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
363                 inet_reset_saddr(sk);
364 failure:
365         inet->inet_dport = 0;
366         sk->sk_route_caps = 0;
367         return err;
368 }
369
370 static void tcp_v6_mtu_reduced(struct sock *sk)
371 {
372         struct dst_entry *dst;
373         u32 mtu;
374
375         if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
376                 return;
377
378         mtu = READ_ONCE(tcp_sk(sk)->mtu_info);
379
380         /* Drop requests trying to increase our current mss.
381          * Check done in __ip6_rt_update_pmtu() is too late.
382          */
383         if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache)
384                 return;
385
386         dst = inet6_csk_update_pmtu(sk, mtu);
387         if (!dst)
388                 return;
389
390         if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
391                 tcp_sync_mss(sk, dst_mtu(dst));
392                 tcp_simple_retransmit(sk);
393         }
394 }
395
396 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
397                 u8 type, u8 code, int offset, __be32 info)
398 {
399         const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
400         const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
401         struct net *net = dev_net(skb->dev);
402         struct request_sock *fastopen;
403         struct ipv6_pinfo *np;
404         struct tcp_sock *tp;
405         __u32 seq, snd_una;
406         struct sock *sk;
407         bool fatal;
408         int err;
409
410         sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
411                                         &hdr->daddr, th->dest,
412                                         &hdr->saddr, ntohs(th->source),
413                                         skb->dev->ifindex, inet6_sdif(skb));
414
415         if (!sk) {
416                 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
417                                   ICMP6_MIB_INERRORS);
418                 return -ENOENT;
419         }
420
421         if (sk->sk_state == TCP_TIME_WAIT) {
422                 inet_twsk_put(inet_twsk(sk));
423                 return 0;
424         }
425         seq = ntohl(th->seq);
426         fatal = icmpv6_err_convert(type, code, &err);
427         if (sk->sk_state == TCP_NEW_SYN_RECV) {
428                 tcp_req_err(sk, seq, fatal);
429                 return 0;
430         }
431
432         bh_lock_sock(sk);
433         if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
434                 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
435
436         if (sk->sk_state == TCP_CLOSE)
437                 goto out;
438
439         if (static_branch_unlikely(&ip6_min_hopcount)) {
440                 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
441                 if (ipv6_hdr(skb)->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) {
442                         __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
443                         goto out;
444                 }
445         }
446
447         tp = tcp_sk(sk);
448         /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
449         fastopen = rcu_dereference(tp->fastopen_rsk);
450         snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
451         if (sk->sk_state != TCP_LISTEN &&
452             !between(seq, snd_una, tp->snd_nxt)) {
453                 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
454                 goto out;
455         }
456
457         np = tcp_inet6_sk(sk);
458
459         if (type == NDISC_REDIRECT) {
460                 if (!sock_owned_by_user(sk)) {
461                         struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
462
463                         if (dst)
464                                 dst->ops->redirect(dst, sk, skb);
465                 }
466                 goto out;
467         }
468
469         if (type == ICMPV6_PKT_TOOBIG) {
470                 u32 mtu = ntohl(info);
471
472                 /* We are not interested in TCP_LISTEN and open_requests
473                  * (SYN-ACKs send out by Linux are always <576bytes so
474                  * they should go through unfragmented).
475                  */
476                 if (sk->sk_state == TCP_LISTEN)
477                         goto out;
478
479                 if (!ip6_sk_accept_pmtu(sk))
480                         goto out;
481
482                 if (mtu < IPV6_MIN_MTU)
483                         goto out;
484
485                 WRITE_ONCE(tp->mtu_info, mtu);
486
487                 if (!sock_owned_by_user(sk))
488                         tcp_v6_mtu_reduced(sk);
489                 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
490                                            &sk->sk_tsq_flags))
491                         sock_hold(sk);
492                 goto out;
493         }
494
495
496         /* Might be for an request_sock */
497         switch (sk->sk_state) {
498         case TCP_SYN_SENT:
499         case TCP_SYN_RECV:
500                 /* Only in fast or simultaneous open. If a fast open socket is
501                  * already accepted it is treated as a connected one below.
502                  */
503                 if (fastopen && !fastopen->sk)
504                         break;
505
506                 ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th);
507
508                 if (!sock_owned_by_user(sk)) {
509                         sk->sk_err = err;
510                         sk_error_report(sk);            /* Wake people up to see the error (see connect in sock.c) */
511
512                         tcp_done(sk);
513                 } else
514                         sk->sk_err_soft = err;
515                 goto out;
516         case TCP_LISTEN:
517                 break;
518         default:
519                 /* check if this ICMP message allows revert of backoff.
520                  * (see RFC 6069)
521                  */
522                 if (!fastopen && type == ICMPV6_DEST_UNREACH &&
523                     code == ICMPV6_NOROUTE)
524                         tcp_ld_RTO_revert(sk, seq);
525         }
526
527         if (!sock_owned_by_user(sk) && np->recverr) {
528                 sk->sk_err = err;
529                 sk_error_report(sk);
530         } else
531                 sk->sk_err_soft = err;
532
533 out:
534         bh_unlock_sock(sk);
535         sock_put(sk);
536         return 0;
537 }
538
539
540 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
541                               struct flowi *fl,
542                               struct request_sock *req,
543                               struct tcp_fastopen_cookie *foc,
544                               enum tcp_synack_type synack_type,
545                               struct sk_buff *syn_skb)
546 {
547         struct inet_request_sock *ireq = inet_rsk(req);
548         struct ipv6_pinfo *np = tcp_inet6_sk(sk);
549         struct ipv6_txoptions *opt;
550         struct flowi6 *fl6 = &fl->u.ip6;
551         struct sk_buff *skb;
552         int err = -ENOMEM;
553         u8 tclass;
554
555         /* First, grab a route. */
556         if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
557                                                IPPROTO_TCP)) == NULL)
558                 goto done;
559
560         skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb);
561
562         if (skb) {
563                 __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
564                                     &ireq->ir_v6_rmt_addr);
565
566                 fl6->daddr = ireq->ir_v6_rmt_addr;
567                 if (np->repflow && ireq->pktopts)
568                         fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
569
570                 tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ?
571                                 (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
572                                 (np->tclass & INET_ECN_MASK) :
573                                 np->tclass;
574
575                 if (!INET_ECN_is_capable(tclass) &&
576                     tcp_bpf_ca_needs_ecn((struct sock *)req))
577                         tclass |= INET_ECN_ECT_0;
578
579                 rcu_read_lock();
580                 opt = ireq->ipv6_opt;
581                 if (!opt)
582                         opt = rcu_dereference(np->opt);
583                 err = ip6_xmit(sk, skb, fl6, skb->mark ? : sk->sk_mark, opt,
584                                tclass, sk->sk_priority);
585                 rcu_read_unlock();
586                 err = net_xmit_eval(err);
587         }
588
589 done:
590         return err;
591 }
592
593
594 static void tcp_v6_reqsk_destructor(struct request_sock *req)
595 {
596         kfree(inet_rsk(req)->ipv6_opt);
597         consume_skb(inet_rsk(req)->pktopts);
598 }
599
600 #ifdef CONFIG_TCP_MD5SIG
601 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
602                                                    const struct in6_addr *addr,
603                                                    int l3index)
604 {
605         return tcp_md5_do_lookup(sk, l3index,
606                                  (union tcp_md5_addr *)addr, AF_INET6);
607 }
608
609 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
610                                                 const struct sock *addr_sk)
611 {
612         int l3index;
613
614         l3index = l3mdev_master_ifindex_by_index(sock_net(sk),
615                                                  addr_sk->sk_bound_dev_if);
616         return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr,
617                                     l3index);
618 }
619
620 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
621                                  sockptr_t optval, int optlen)
622 {
623         struct tcp_md5sig cmd;
624         struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
625         int l3index = 0;
626         u8 prefixlen;
627         u8 flags;
628
629         if (optlen < sizeof(cmd))
630                 return -EINVAL;
631
632         if (copy_from_sockptr(&cmd, optval, sizeof(cmd)))
633                 return -EFAULT;
634
635         if (sin6->sin6_family != AF_INET6)
636                 return -EINVAL;
637
638         flags = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX;
639
640         if (optname == TCP_MD5SIG_EXT &&
641             cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
642                 prefixlen = cmd.tcpm_prefixlen;
643                 if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
644                                         prefixlen > 32))
645                         return -EINVAL;
646         } else {
647                 prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
648         }
649
650         if (optname == TCP_MD5SIG_EXT && cmd.tcpm_ifindex &&
651             cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) {
652                 struct net_device *dev;
653
654                 rcu_read_lock();
655                 dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex);
656                 if (dev && netif_is_l3_master(dev))
657                         l3index = dev->ifindex;
658                 rcu_read_unlock();
659
660                 /* ok to reference set/not set outside of rcu;
661                  * right now device MUST be an L3 master
662                  */
663                 if (!dev || !l3index)
664                         return -EINVAL;
665         }
666
667         if (!cmd.tcpm_keylen) {
668                 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
669                         return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
670                                               AF_INET, prefixlen,
671                                               l3index, flags);
672                 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
673                                       AF_INET6, prefixlen, l3index, flags);
674         }
675
676         if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
677                 return -EINVAL;
678
679         if (ipv6_addr_v4mapped(&sin6->sin6_addr))
680                 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
681                                       AF_INET, prefixlen, l3index, flags,
682                                       cmd.tcpm_key, cmd.tcpm_keylen,
683                                       GFP_KERNEL);
684
685         return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
686                               AF_INET6, prefixlen, l3index, flags,
687                               cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
688 }
689
690 static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
691                                    const struct in6_addr *daddr,
692                                    const struct in6_addr *saddr,
693                                    const struct tcphdr *th, int nbytes)
694 {
695         struct tcp6_pseudohdr *bp;
696         struct scatterlist sg;
697         struct tcphdr *_th;
698
699         bp = hp->scratch;
700         /* 1. TCP pseudo-header (RFC2460) */
701         bp->saddr = *saddr;
702         bp->daddr = *daddr;
703         bp->protocol = cpu_to_be32(IPPROTO_TCP);
704         bp->len = cpu_to_be32(nbytes);
705
706         _th = (struct tcphdr *)(bp + 1);
707         memcpy(_th, th, sizeof(*th));
708         _th->check = 0;
709
710         sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
711         ahash_request_set_crypt(hp->md5_req, &sg, NULL,
712                                 sizeof(*bp) + sizeof(*th));
713         return crypto_ahash_update(hp->md5_req);
714 }
715
716 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
717                                const struct in6_addr *daddr, struct in6_addr *saddr,
718                                const struct tcphdr *th)
719 {
720         struct tcp_md5sig_pool *hp;
721         struct ahash_request *req;
722
723         hp = tcp_get_md5sig_pool();
724         if (!hp)
725                 goto clear_hash_noput;
726         req = hp->md5_req;
727
728         if (crypto_ahash_init(req))
729                 goto clear_hash;
730         if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
731                 goto clear_hash;
732         if (tcp_md5_hash_key(hp, key))
733                 goto clear_hash;
734         ahash_request_set_crypt(req, NULL, md5_hash, 0);
735         if (crypto_ahash_final(req))
736                 goto clear_hash;
737
738         tcp_put_md5sig_pool();
739         return 0;
740
741 clear_hash:
742         tcp_put_md5sig_pool();
743 clear_hash_noput:
744         memset(md5_hash, 0, 16);
745         return 1;
746 }
747
748 static int tcp_v6_md5_hash_skb(char *md5_hash,
749                                const struct tcp_md5sig_key *key,
750                                const struct sock *sk,
751                                const struct sk_buff *skb)
752 {
753         const struct in6_addr *saddr, *daddr;
754         struct tcp_md5sig_pool *hp;
755         struct ahash_request *req;
756         const struct tcphdr *th = tcp_hdr(skb);
757
758         if (sk) { /* valid for establish/request sockets */
759                 saddr = &sk->sk_v6_rcv_saddr;
760                 daddr = &sk->sk_v6_daddr;
761         } else {
762                 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
763                 saddr = &ip6h->saddr;
764                 daddr = &ip6h->daddr;
765         }
766
767         hp = tcp_get_md5sig_pool();
768         if (!hp)
769                 goto clear_hash_noput;
770         req = hp->md5_req;
771
772         if (crypto_ahash_init(req))
773                 goto clear_hash;
774
775         if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
776                 goto clear_hash;
777         if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
778                 goto clear_hash;
779         if (tcp_md5_hash_key(hp, key))
780                 goto clear_hash;
781         ahash_request_set_crypt(req, NULL, md5_hash, 0);
782         if (crypto_ahash_final(req))
783                 goto clear_hash;
784
785         tcp_put_md5sig_pool();
786         return 0;
787
788 clear_hash:
789         tcp_put_md5sig_pool();
790 clear_hash_noput:
791         memset(md5_hash, 0, 16);
792         return 1;
793 }
794
795 #endif
796
797 static void tcp_v6_init_req(struct request_sock *req,
798                             const struct sock *sk_listener,
799                             struct sk_buff *skb)
800 {
801         bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
802         struct inet_request_sock *ireq = inet_rsk(req);
803         const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener);
804
805         ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
806         ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
807
808         /* So that link locals have meaning */
809         if ((!sk_listener->sk_bound_dev_if || l3_slave) &&
810             ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
811                 ireq->ir_iif = tcp_v6_iif(skb);
812
813         if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
814             (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
815              np->rxopt.bits.rxinfo ||
816              np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
817              np->rxopt.bits.rxohlim || np->repflow)) {
818                 refcount_inc(&skb->users);
819                 ireq->pktopts = skb;
820         }
821 }
822
823 static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
824                                           struct sk_buff *skb,
825                                           struct flowi *fl,
826                                           struct request_sock *req)
827 {
828         tcp_v6_init_req(req, sk, skb);
829
830         if (security_inet_conn_request(sk, skb, req))
831                 return NULL;
832
833         return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
834 }
835
836 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
837         .family         =       AF_INET6,
838         .obj_size       =       sizeof(struct tcp6_request_sock),
839         .rtx_syn_ack    =       tcp_rtx_synack,
840         .send_ack       =       tcp_v6_reqsk_send_ack,
841         .destructor     =       tcp_v6_reqsk_destructor,
842         .send_reset     =       tcp_v6_send_reset,
843         .syn_ack_timeout =      tcp_syn_ack_timeout,
844 };
845
846 const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
847         .mss_clamp      =       IPV6_MIN_MTU - sizeof(struct tcphdr) -
848                                 sizeof(struct ipv6hdr),
849 #ifdef CONFIG_TCP_MD5SIG
850         .req_md5_lookup =       tcp_v6_md5_lookup,
851         .calc_md5_hash  =       tcp_v6_md5_hash_skb,
852 #endif
853 #ifdef CONFIG_SYN_COOKIES
854         .cookie_init_seq =      cookie_v6_init_sequence,
855 #endif
856         .route_req      =       tcp_v6_route_req,
857         .init_seq       =       tcp_v6_init_seq,
858         .init_ts_off    =       tcp_v6_init_ts_off,
859         .send_synack    =       tcp_v6_send_synack,
860 };
861
862 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
863                                  u32 ack, u32 win, u32 tsval, u32 tsecr,
864                                  int oif, struct tcp_md5sig_key *key, int rst,
865                                  u8 tclass, __be32 label, u32 priority, u32 txhash)
866 {
867         const struct tcphdr *th = tcp_hdr(skb);
868         struct tcphdr *t1;
869         struct sk_buff *buff;
870         struct flowi6 fl6;
871         struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
872         struct sock *ctl_sk = net->ipv6.tcp_sk;
873         unsigned int tot_len = sizeof(struct tcphdr);
874         __be32 mrst = 0, *topt;
875         struct dst_entry *dst;
876         __u32 mark = 0;
877
878         if (tsecr)
879                 tot_len += TCPOLEN_TSTAMP_ALIGNED;
880 #ifdef CONFIG_TCP_MD5SIG
881         if (key)
882                 tot_len += TCPOLEN_MD5SIG_ALIGNED;
883 #endif
884
885 #ifdef CONFIG_MPTCP
886         if (rst && !key) {
887                 mrst = mptcp_reset_option(skb);
888
889                 if (mrst)
890                         tot_len += sizeof(__be32);
891         }
892 #endif
893
894         buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
895         if (!buff)
896                 return;
897
898         skb_reserve(buff, MAX_TCP_HEADER);
899
900         t1 = skb_push(buff, tot_len);
901         skb_reset_transport_header(buff);
902
903         /* Swap the send and the receive. */
904         memset(t1, 0, sizeof(*t1));
905         t1->dest = th->source;
906         t1->source = th->dest;
907         t1->doff = tot_len / 4;
908         t1->seq = htonl(seq);
909         t1->ack_seq = htonl(ack);
910         t1->ack = !rst || !th->ack;
911         t1->rst = rst;
912         t1->window = htons(win);
913
914         topt = (__be32 *)(t1 + 1);
915
916         if (tsecr) {
917                 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
918                                 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
919                 *topt++ = htonl(tsval);
920                 *topt++ = htonl(tsecr);
921         }
922
923         if (mrst)
924                 *topt++ = mrst;
925
926 #ifdef CONFIG_TCP_MD5SIG
927         if (key) {
928                 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
929                                 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
930                 tcp_v6_md5_hash_hdr((__u8 *)topt, key,
931                                     &ipv6_hdr(skb)->saddr,
932                                     &ipv6_hdr(skb)->daddr, t1);
933         }
934 #endif
935
936         memset(&fl6, 0, sizeof(fl6));
937         fl6.daddr = ipv6_hdr(skb)->saddr;
938         fl6.saddr = ipv6_hdr(skb)->daddr;
939         fl6.flowlabel = label;
940
941         buff->ip_summed = CHECKSUM_PARTIAL;
942
943         __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
944
945         fl6.flowi6_proto = IPPROTO_TCP;
946         if (rt6_need_strict(&fl6.daddr) && !oif)
947                 fl6.flowi6_oif = tcp_v6_iif(skb);
948         else {
949                 if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
950                         oif = skb->skb_iif;
951
952                 fl6.flowi6_oif = oif;
953         }
954
955         if (sk) {
956                 if (sk->sk_state == TCP_TIME_WAIT)
957                         mark = inet_twsk(sk)->tw_mark;
958                 else
959                         mark = sk->sk_mark;
960                 skb_set_delivery_time(buff, tcp_transmit_time(sk), true);
961         }
962         if (txhash) {
963                 /* autoflowlabel/skb_get_hash_flowi6 rely on buff->hash */
964                 skb_set_hash(buff, txhash, PKT_HASH_TYPE_L4);
965         }
966         fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark;
967         fl6.fl6_dport = t1->dest;
968         fl6.fl6_sport = t1->source;
969         fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
970         security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
971
972         /* Pass a socket to ip6_dst_lookup either it is for RST
973          * Underlying function will use this to retrieve the network
974          * namespace
975          */
976         if (sk && sk->sk_state != TCP_TIME_WAIT)
977                 dst = ip6_dst_lookup_flow(net, sk, &fl6, NULL); /*sk's xfrm_policy can be referred*/
978         else
979                 dst = ip6_dst_lookup_flow(net, ctl_sk, &fl6, NULL);
980         if (!IS_ERR(dst)) {
981                 skb_dst_set(buff, dst);
982                 ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL,
983                          tclass & ~INET_ECN_MASK, priority);
984                 TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
985                 if (rst)
986                         TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
987                 return;
988         }
989
990         kfree_skb(buff);
991 }
992
993 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
994 {
995         const struct tcphdr *th = tcp_hdr(skb);
996         struct ipv6hdr *ipv6h = ipv6_hdr(skb);
997         u32 seq = 0, ack_seq = 0;
998         struct tcp_md5sig_key *key = NULL;
999 #ifdef CONFIG_TCP_MD5SIG
1000         const __u8 *hash_location = NULL;
1001         unsigned char newhash[16];
1002         int genhash;
1003         struct sock *sk1 = NULL;
1004 #endif
1005         __be32 label = 0;
1006         u32 priority = 0;
1007         struct net *net;
1008         u32 txhash = 0;
1009         int oif = 0;
1010
1011         if (th->rst)
1012                 return;
1013
1014         /* If sk not NULL, it means we did a successful lookup and incoming
1015          * route had to be correct. prequeue might have dropped our dst.
1016          */
1017         if (!sk && !ipv6_unicast_destination(skb))
1018                 return;
1019
1020         net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
1021 #ifdef CONFIG_TCP_MD5SIG
1022         rcu_read_lock();
1023         hash_location = tcp_parse_md5sig_option(th);
1024         if (sk && sk_fullsock(sk)) {
1025                 int l3index;
1026
1027                 /* sdif set, means packet ingressed via a device
1028                  * in an L3 domain and inet_iif is set to it.
1029                  */
1030                 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1031                 key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index);
1032         } else if (hash_location) {
1033                 int dif = tcp_v6_iif_l3_slave(skb);
1034                 int sdif = tcp_v6_sdif(skb);
1035                 int l3index;
1036
1037                 /*
1038                  * active side is lost. Try to find listening socket through
1039                  * source port, and then find md5 key through listening socket.
1040                  * we are not loose security here:
1041                  * Incoming packet is checked with md5 hash with finding key,
1042                  * no RST generated if md5 hash doesn't match.
1043                  */
1044                 sk1 = inet6_lookup_listener(net, net->ipv4.tcp_death_row.hashinfo,
1045                                             NULL, 0, &ipv6h->saddr, th->source,
1046                                             &ipv6h->daddr, ntohs(th->source),
1047                                             dif, sdif);
1048                 if (!sk1)
1049                         goto out;
1050
1051                 /* sdif set, means packet ingressed via a device
1052                  * in an L3 domain and dif is set to it.
1053                  */
1054                 l3index = tcp_v6_sdif(skb) ? dif : 0;
1055
1056                 key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index);
1057                 if (!key)
1058                         goto out;
1059
1060                 genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
1061                 if (genhash || memcmp(hash_location, newhash, 16) != 0)
1062                         goto out;
1063         }
1064 #endif
1065
1066         if (th->ack)
1067                 seq = ntohl(th->ack_seq);
1068         else
1069                 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
1070                           (th->doff << 2);
1071
1072         if (sk) {
1073                 oif = sk->sk_bound_dev_if;
1074                 if (sk_fullsock(sk)) {
1075                         const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1076
1077                         trace_tcp_send_reset(sk, skb);
1078                         if (np->repflow)
1079                                 label = ip6_flowlabel(ipv6h);
1080                         priority = sk->sk_priority;
1081                         txhash = sk->sk_hash;
1082                 }
1083                 if (sk->sk_state == TCP_TIME_WAIT) {
1084                         label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel);
1085                         priority = inet_twsk(sk)->tw_priority;
1086                         txhash = inet_twsk(sk)->tw_txhash;
1087                 }
1088         } else {
1089                 if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET)
1090                         label = ip6_flowlabel(ipv6h);
1091         }
1092
1093         tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1,
1094                              ipv6_get_dsfield(ipv6h), label, priority, txhash);
1095
1096 #ifdef CONFIG_TCP_MD5SIG
1097 out:
1098         rcu_read_unlock();
1099 #endif
1100 }
1101
1102 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
1103                             u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
1104                             struct tcp_md5sig_key *key, u8 tclass,
1105                             __be32 label, u32 priority, u32 txhash)
1106 {
1107         tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
1108                              tclass, label, priority, txhash);
1109 }
1110
1111 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1112 {
1113         struct inet_timewait_sock *tw = inet_twsk(sk);
1114         struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1115
1116         tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1117                         tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1118                         tcp_time_stamp_raw() + tcptw->tw_ts_offset,
1119                         tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
1120                         tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority,
1121                         tw->tw_txhash);
1122
1123         inet_twsk_put(tw);
1124 }
1125
1126 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
1127                                   struct request_sock *req)
1128 {
1129         int l3index;
1130
1131         l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1132
1133         /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
1134          * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
1135          */
1136         /* RFC 7323 2.3
1137          * The window field (SEG.WND) of every outgoing segment, with the
1138          * exception of <SYN> segments, MUST be right-shifted by
1139          * Rcv.Wind.Shift bits:
1140          */
1141         tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
1142                         tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
1143                         tcp_rsk(req)->rcv_nxt,
1144                         req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
1145                         tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
1146                         req->ts_recent, sk->sk_bound_dev_if,
1147                         tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index),
1148                         ipv6_get_dsfield(ipv6_hdr(skb)), 0, sk->sk_priority,
1149                         tcp_rsk(req)->txhash);
1150 }
1151
1152
1153 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
1154 {
1155 #ifdef CONFIG_SYN_COOKIES
1156         const struct tcphdr *th = tcp_hdr(skb);
1157
1158         if (!th->syn)
1159                 sk = cookie_v6_check(sk, skb);
1160 #endif
1161         return sk;
1162 }
1163
1164 u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
1165                          struct tcphdr *th, u32 *cookie)
1166 {
1167         u16 mss = 0;
1168 #ifdef CONFIG_SYN_COOKIES
1169         mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops,
1170                                     &tcp_request_sock_ipv6_ops, sk, th);
1171         if (mss) {
1172                 *cookie = __cookie_v6_init_sequence(iph, th, &mss);
1173                 tcp_synq_overflow(sk);
1174         }
1175 #endif
1176         return mss;
1177 }
1178
1179 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1180 {
1181         if (skb->protocol == htons(ETH_P_IP))
1182                 return tcp_v4_conn_request(sk, skb);
1183
1184         if (!ipv6_unicast_destination(skb))
1185                 goto drop;
1186
1187         if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) {
1188                 __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS);
1189                 return 0;
1190         }
1191
1192         return tcp_conn_request(&tcp6_request_sock_ops,
1193                                 &tcp_request_sock_ipv6_ops, sk, skb);
1194
1195 drop:
1196         tcp_listendrop(sk);
1197         return 0; /* don't send reset */
1198 }
1199
1200 static void tcp_v6_restore_cb(struct sk_buff *skb)
1201 {
1202         /* We need to move header back to the beginning if xfrm6_policy_check()
1203          * and tcp_v6_fill_cb() are going to be called again.
1204          * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
1205          */
1206         memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
1207                 sizeof(struct inet6_skb_parm));
1208 }
1209
1210 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1211                                          struct request_sock *req,
1212                                          struct dst_entry *dst,
1213                                          struct request_sock *req_unhash,
1214                                          bool *own_req)
1215 {
1216         struct inet_request_sock *ireq;
1217         struct ipv6_pinfo *newnp;
1218         const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1219         struct ipv6_txoptions *opt;
1220         struct inet_sock *newinet;
1221         bool found_dup_sk = false;
1222         struct tcp_sock *newtp;
1223         struct sock *newsk;
1224 #ifdef CONFIG_TCP_MD5SIG
1225         struct tcp_md5sig_key *key;
1226         int l3index;
1227 #endif
1228         struct flowi6 fl6;
1229
1230         if (skb->protocol == htons(ETH_P_IP)) {
1231                 /*
1232                  *      v6 mapped
1233                  */
1234
1235                 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
1236                                              req_unhash, own_req);
1237
1238                 if (!newsk)
1239                         return NULL;
1240
1241                 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1242
1243                 newnp = tcp_inet6_sk(newsk);
1244                 newtp = tcp_sk(newsk);
1245
1246                 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1247
1248                 newnp->saddr = newsk->sk_v6_rcv_saddr;
1249
1250                 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1251                 if (sk_is_mptcp(newsk))
1252                         mptcpv6_handle_mapped(newsk, true);
1253                 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1254 #ifdef CONFIG_TCP_MD5SIG
1255                 newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1256 #endif
1257
1258                 newnp->ipv6_mc_list = NULL;
1259                 newnp->ipv6_ac_list = NULL;
1260                 newnp->ipv6_fl_list = NULL;
1261                 newnp->pktoptions  = NULL;
1262                 newnp->opt         = NULL;
1263                 newnp->mcast_oif   = inet_iif(skb);
1264                 newnp->mcast_hops  = ip_hdr(skb)->ttl;
1265                 newnp->rcv_flowinfo = 0;
1266                 if (np->repflow)
1267                         newnp->flow_label = 0;
1268
1269                 /*
1270                  * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1271                  * here, tcp_create_openreq_child now does this for us, see the comment in
1272                  * that function for the gory details. -acme
1273                  */
1274
1275                 /* It is tricky place. Until this moment IPv4 tcp
1276                    worked with IPv6 icsk.icsk_af_ops.
1277                    Sync it now.
1278                  */
1279                 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1280
1281                 return newsk;
1282         }
1283
1284         ireq = inet_rsk(req);
1285
1286         if (sk_acceptq_is_full(sk))
1287                 goto out_overflow;
1288
1289         if (!dst) {
1290                 dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
1291                 if (!dst)
1292                         goto out;
1293         }
1294
1295         newsk = tcp_create_openreq_child(sk, req, skb);
1296         if (!newsk)
1297                 goto out_nonewsk;
1298
1299         /*
1300          * No need to charge this sock to the relevant IPv6 refcnt debug socks
1301          * count here, tcp_create_openreq_child now does this for us, see the
1302          * comment in that function for the gory details. -acme
1303          */
1304
1305         newsk->sk_gso_type = SKB_GSO_TCPV6;
1306         ip6_dst_store(newsk, dst, NULL, NULL);
1307         inet6_sk_rx_dst_set(newsk, skb);
1308
1309         inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1310
1311         newtp = tcp_sk(newsk);
1312         newinet = inet_sk(newsk);
1313         newnp = tcp_inet6_sk(newsk);
1314
1315         memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1316
1317         newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
1318         newnp->saddr = ireq->ir_v6_loc_addr;
1319         newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
1320         newsk->sk_bound_dev_if = ireq->ir_iif;
1321
1322         /* Now IPv6 options...
1323
1324            First: no IPv4 options.
1325          */
1326         newinet->inet_opt = NULL;
1327         newnp->ipv6_mc_list = NULL;
1328         newnp->ipv6_ac_list = NULL;
1329         newnp->ipv6_fl_list = NULL;
1330
1331         /* Clone RX bits */
1332         newnp->rxopt.all = np->rxopt.all;
1333
1334         newnp->pktoptions = NULL;
1335         newnp->opt        = NULL;
1336         newnp->mcast_oif  = tcp_v6_iif(skb);
1337         newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1338         newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1339         if (np->repflow)
1340                 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1341
1342         /* Set ToS of the new socket based upon the value of incoming SYN.
1343          * ECT bits are set later in tcp_init_transfer().
1344          */
1345         if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos))
1346                 newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;
1347
1348         /* Clone native IPv6 options from listening socket (if any)
1349
1350            Yes, keeping reference count would be much more clever,
1351            but we make one more one thing there: reattach optmem
1352            to newsk.
1353          */
1354         opt = ireq->ipv6_opt;
1355         if (!opt)
1356                 opt = rcu_dereference(np->opt);
1357         if (opt) {
1358                 opt = ipv6_dup_options(newsk, opt);
1359                 RCU_INIT_POINTER(newnp->opt, opt);
1360         }
1361         inet_csk(newsk)->icsk_ext_hdr_len = 0;
1362         if (opt)
1363                 inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
1364                                                     opt->opt_flen;
1365
1366         tcp_ca_openreq_child(newsk, dst);
1367
1368         tcp_sync_mss(newsk, dst_mtu(dst));
1369         newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1370
1371         tcp_initialize_rcv_mss(newsk);
1372
1373         newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1374         newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1375
1376 #ifdef CONFIG_TCP_MD5SIG
1377         l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif);
1378
1379         /* Copy over the MD5 key from the original socket */
1380         key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index);
1381         if (key) {
1382                 /* We're using one, so create a matching key
1383                  * on the newsk structure. If we fail to get
1384                  * memory, then we end up not copying the key
1385                  * across. Shucks.
1386                  */
1387                 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr,
1388                                AF_INET6, 128, l3index, key->flags, key->key, key->keylen,
1389                                sk_gfp_mask(sk, GFP_ATOMIC));
1390         }
1391 #endif
1392
1393         if (__inet_inherit_port(sk, newsk) < 0) {
1394                 inet_csk_prepare_forced_close(newsk);
1395                 tcp_done(newsk);
1396                 goto out;
1397         }
1398         *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash),
1399                                        &found_dup_sk);
1400         if (*own_req) {
1401                 tcp_move_syn(newtp, req);
1402
1403                 /* Clone pktoptions received with SYN, if we own the req */
1404                 if (ireq->pktopts) {
1405                         newnp->pktoptions = skb_clone(ireq->pktopts,
1406                                                       sk_gfp_mask(sk, GFP_ATOMIC));
1407                         consume_skb(ireq->pktopts);
1408                         ireq->pktopts = NULL;
1409                         if (newnp->pktoptions) {
1410                                 tcp_v6_restore_cb(newnp->pktoptions);
1411                                 skb_set_owner_r(newnp->pktoptions, newsk);
1412                         }
1413                 }
1414         } else {
1415                 if (!req_unhash && found_dup_sk) {
1416                         /* This code path should only be executed in the
1417                          * syncookie case only
1418                          */
1419                         bh_unlock_sock(newsk);
1420                         sock_put(newsk);
1421                         newsk = NULL;
1422                 }
1423         }
1424
1425         return newsk;
1426
1427 out_overflow:
1428         __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1429 out_nonewsk:
1430         dst_release(dst);
1431 out:
1432         tcp_listendrop(sk);
1433         return NULL;
1434 }
1435
1436 INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *,
1437                                                            u32));
1438 /* The socket must have it's spinlock held when we get
1439  * here, unless it is a TCP_LISTEN socket.
1440  *
1441  * We have a potential double-lock case here, so even when
1442  * doing backlog processing we use the BH locking scheme.
1443  * This is because we cannot sleep with the original spinlock
1444  * held.
1445  */
1446 INDIRECT_CALLABLE_SCOPE
1447 int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1448 {
1449         struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1450         struct sk_buff *opt_skb = NULL;
1451         enum skb_drop_reason reason;
1452         struct tcp_sock *tp;
1453
1454         /* Imagine: socket is IPv6. IPv4 packet arrives,
1455            goes to IPv4 receive handler and backlogged.
1456            From backlog it always goes here. Kerboom...
1457            Fortunately, tcp_rcv_established and rcv_established
1458            handle them correctly, but it is not case with
1459            tcp_v6_hnd_req and tcp_v6_send_reset().   --ANK
1460          */
1461
1462         if (skb->protocol == htons(ETH_P_IP))
1463                 return tcp_v4_do_rcv(sk, skb);
1464
1465         /*
1466          *      socket locking is here for SMP purposes as backlog rcv
1467          *      is currently called with bh processing disabled.
1468          */
1469
1470         /* Do Stevens' IPV6_PKTOPTIONS.
1471
1472            Yes, guys, it is the only place in our code, where we
1473            may make it not affecting IPv4.
1474            The rest of code is protocol independent,
1475            and I do not like idea to uglify IPv4.
1476
1477            Actually, all the idea behind IPV6_PKTOPTIONS
1478            looks not very well thought. For now we latch
1479            options, received in the last packet, enqueued
1480            by tcp. Feel free to propose better solution.
1481                                                --ANK (980728)
1482          */
1483         if (np->rxopt.all)
1484                 opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC));
1485
1486         reason = SKB_DROP_REASON_NOT_SPECIFIED;
1487         if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1488                 struct dst_entry *dst;
1489
1490                 dst = rcu_dereference_protected(sk->sk_rx_dst,
1491                                                 lockdep_sock_is_held(sk));
1492
1493                 sock_rps_save_rxhash(sk, skb);
1494                 sk_mark_napi_id(sk, skb);
1495                 if (dst) {
1496                         if (sk->sk_rx_dst_ifindex != skb->skb_iif ||
1497                             INDIRECT_CALL_1(dst->ops->check, ip6_dst_check,
1498                                             dst, sk->sk_rx_dst_cookie) == NULL) {
1499                                 RCU_INIT_POINTER(sk->sk_rx_dst, NULL);
1500                                 dst_release(dst);
1501                         }
1502                 }
1503
1504                 tcp_rcv_established(sk, skb);
1505                 if (opt_skb)
1506                         goto ipv6_pktoptions;
1507                 return 0;
1508         }
1509
1510         if (tcp_checksum_complete(skb))
1511                 goto csum_err;
1512
1513         if (sk->sk_state == TCP_LISTEN) {
1514                 struct sock *nsk = tcp_v6_cookie_check(sk, skb);
1515
1516                 if (!nsk)
1517                         goto discard;
1518
1519                 if (nsk != sk) {
1520                         if (tcp_child_process(sk, nsk, skb))
1521                                 goto reset;
1522                         if (opt_skb)
1523                                 __kfree_skb(opt_skb);
1524                         return 0;
1525                 }
1526         } else
1527                 sock_rps_save_rxhash(sk, skb);
1528
1529         if (tcp_rcv_state_process(sk, skb))
1530                 goto reset;
1531         if (opt_skb)
1532                 goto ipv6_pktoptions;
1533         return 0;
1534
1535 reset:
1536         tcp_v6_send_reset(sk, skb);
1537 discard:
1538         if (opt_skb)
1539                 __kfree_skb(opt_skb);
1540         kfree_skb_reason(skb, reason);
1541         return 0;
1542 csum_err:
1543         reason = SKB_DROP_REASON_TCP_CSUM;
1544         trace_tcp_bad_csum(skb);
1545         TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1546         TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1547         goto discard;
1548
1549
1550 ipv6_pktoptions:
1551         /* Do you ask, what is it?
1552
1553            1. skb was enqueued by tcp.
1554            2. skb is added to tail of read queue, rather than out of order.
1555            3. socket is not in passive state.
1556            4. Finally, it really contains options, which user wants to receive.
1557          */
1558         tp = tcp_sk(sk);
1559         if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1560             !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1561                 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1562                         np->mcast_oif = tcp_v6_iif(opt_skb);
1563                 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1564                         np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1565                 if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1566                         np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1567                 if (np->repflow)
1568                         np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1569                 if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
1570                         skb_set_owner_r(opt_skb, sk);
1571                         tcp_v6_restore_cb(opt_skb);
1572                         opt_skb = xchg(&np->pktoptions, opt_skb);
1573                 } else {
1574                         __kfree_skb(opt_skb);
1575                         opt_skb = xchg(&np->pktoptions, NULL);
1576                 }
1577         }
1578
1579         consume_skb(opt_skb);
1580         return 0;
1581 }
1582
1583 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
1584                            const struct tcphdr *th)
1585 {
1586         /* This is tricky: we move IP6CB at its correct location into
1587          * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
1588          * _decode_session6() uses IP6CB().
1589          * barrier() makes sure compiler won't play aliasing games.
1590          */
1591         memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
1592                 sizeof(struct inet6_skb_parm));
1593         barrier();
1594
1595         TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1596         TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1597                                     skb->len - th->doff*4);
1598         TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1599         TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1600         TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1601         TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1602         TCP_SKB_CB(skb)->sacked = 0;
1603         TCP_SKB_CB(skb)->has_rxtstamp =
1604                         skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1605 }
1606
1607 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
1608 {
1609         enum skb_drop_reason drop_reason;
1610         int sdif = inet6_sdif(skb);
1611         int dif = inet6_iif(skb);
1612         const struct tcphdr *th;
1613         const struct ipv6hdr *hdr;
1614         bool refcounted;
1615         struct sock *sk;
1616         int ret;
1617         struct net *net = dev_net(skb->dev);
1618
1619         drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
1620         if (skb->pkt_type != PACKET_HOST)
1621                 goto discard_it;
1622
1623         /*
1624          *      Count it even if it's bad.
1625          */
1626         __TCP_INC_STATS(net, TCP_MIB_INSEGS);
1627
1628         if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1629                 goto discard_it;
1630
1631         th = (const struct tcphdr *)skb->data;
1632
1633         if (unlikely(th->doff < sizeof(struct tcphdr) / 4)) {
1634                 drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL;
1635                 goto bad_packet;
1636         }
1637         if (!pskb_may_pull(skb, th->doff*4))
1638                 goto discard_it;
1639
1640         if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1641                 goto csum_error;
1642
1643         th = (const struct tcphdr *)skb->data;
1644         hdr = ipv6_hdr(skb);
1645
1646 lookup:
1647         sk = __inet6_lookup_skb(net->ipv4.tcp_death_row.hashinfo, skb, __tcp_hdrlen(th),
1648                                 th->source, th->dest, inet6_iif(skb), sdif,
1649                                 &refcounted);
1650         if (!sk)
1651                 goto no_tcp_socket;
1652
1653 process:
1654         if (sk->sk_state == TCP_TIME_WAIT)
1655                 goto do_time_wait;
1656
1657         if (sk->sk_state == TCP_NEW_SYN_RECV) {
1658                 struct request_sock *req = inet_reqsk(sk);
1659                 bool req_stolen = false;
1660                 struct sock *nsk;
1661
1662                 sk = req->rsk_listener;
1663                 drop_reason = tcp_inbound_md5_hash(sk, skb,
1664                                                    &hdr->saddr, &hdr->daddr,
1665                                                    AF_INET6, dif, sdif);
1666                 if (drop_reason) {
1667                         sk_drops_add(sk, skb);
1668                         reqsk_put(req);
1669                         goto discard_it;
1670                 }
1671                 if (tcp_checksum_complete(skb)) {
1672                         reqsk_put(req);
1673                         goto csum_error;
1674                 }
1675                 if (unlikely(sk->sk_state != TCP_LISTEN)) {
1676                         nsk = reuseport_migrate_sock(sk, req_to_sk(req), skb);
1677                         if (!nsk) {
1678                                 inet_csk_reqsk_queue_drop_and_put(sk, req);
1679                                 goto lookup;
1680                         }
1681                         sk = nsk;
1682                         /* reuseport_migrate_sock() has already held one sk_refcnt
1683                          * before returning.
1684                          */
1685                 } else {
1686                         sock_hold(sk);
1687                 }
1688                 refcounted = true;
1689                 nsk = NULL;
1690                 if (!tcp_filter(sk, skb)) {
1691                         th = (const struct tcphdr *)skb->data;
1692                         hdr = ipv6_hdr(skb);
1693                         tcp_v6_fill_cb(skb, hdr, th);
1694                         nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
1695                 } else {
1696                         drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
1697                 }
1698                 if (!nsk) {
1699                         reqsk_put(req);
1700                         if (req_stolen) {
1701                                 /* Another cpu got exclusive access to req
1702                                  * and created a full blown socket.
1703                                  * Try to feed this packet to this socket
1704                                  * instead of discarding it.
1705                                  */
1706                                 tcp_v6_restore_cb(skb);
1707                                 sock_put(sk);
1708                                 goto lookup;
1709                         }
1710                         goto discard_and_relse;
1711                 }
1712                 if (nsk == sk) {
1713                         reqsk_put(req);
1714                         tcp_v6_restore_cb(skb);
1715                 } else if (tcp_child_process(sk, nsk, skb)) {
1716                         tcp_v6_send_reset(nsk, skb);
1717                         goto discard_and_relse;
1718                 } else {
1719                         sock_put(sk);
1720                         return 0;
1721                 }
1722         }
1723
1724         if (static_branch_unlikely(&ip6_min_hopcount)) {
1725                 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
1726                 if (hdr->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) {
1727                         __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1728                         goto discard_and_relse;
1729                 }
1730         }
1731
1732         if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) {
1733                 drop_reason = SKB_DROP_REASON_XFRM_POLICY;
1734                 goto discard_and_relse;
1735         }
1736
1737         drop_reason = tcp_inbound_md5_hash(sk, skb, &hdr->saddr, &hdr->daddr,
1738                                            AF_INET6, dif, sdif);
1739         if (drop_reason)
1740                 goto discard_and_relse;
1741
1742         if (tcp_filter(sk, skb)) {
1743                 drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
1744                 goto discard_and_relse;
1745         }
1746         th = (const struct tcphdr *)skb->data;
1747         hdr = ipv6_hdr(skb);
1748         tcp_v6_fill_cb(skb, hdr, th);
1749
1750         skb->dev = NULL;
1751
1752         if (sk->sk_state == TCP_LISTEN) {
1753                 ret = tcp_v6_do_rcv(sk, skb);
1754                 goto put_and_return;
1755         }
1756
1757         sk_incoming_cpu_update(sk);
1758
1759         bh_lock_sock_nested(sk);
1760         tcp_segs_in(tcp_sk(sk), skb);
1761         ret = 0;
1762         if (!sock_owned_by_user(sk)) {
1763                 ret = tcp_v6_do_rcv(sk, skb);
1764         } else {
1765                 if (tcp_add_backlog(sk, skb, &drop_reason))
1766                         goto discard_and_relse;
1767         }
1768         bh_unlock_sock(sk);
1769 put_and_return:
1770         if (refcounted)
1771                 sock_put(sk);
1772         return ret ? -1 : 0;
1773
1774 no_tcp_socket:
1775         drop_reason = SKB_DROP_REASON_NO_SOCKET;
1776         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1777                 goto discard_it;
1778
1779         tcp_v6_fill_cb(skb, hdr, th);
1780
1781         if (tcp_checksum_complete(skb)) {
1782 csum_error:
1783                 drop_reason = SKB_DROP_REASON_TCP_CSUM;
1784                 trace_tcp_bad_csum(skb);
1785                 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1786 bad_packet:
1787                 __TCP_INC_STATS(net, TCP_MIB_INERRS);
1788         } else {
1789                 tcp_v6_send_reset(NULL, skb);
1790         }
1791
1792 discard_it:
1793         SKB_DR_OR(drop_reason, NOT_SPECIFIED);
1794         kfree_skb_reason(skb, drop_reason);
1795         return 0;
1796
1797 discard_and_relse:
1798         sk_drops_add(sk, skb);
1799         if (refcounted)
1800                 sock_put(sk);
1801         goto discard_it;
1802
1803 do_time_wait:
1804         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1805                 drop_reason = SKB_DROP_REASON_XFRM_POLICY;
1806                 inet_twsk_put(inet_twsk(sk));
1807                 goto discard_it;
1808         }
1809
1810         tcp_v6_fill_cb(skb, hdr, th);
1811
1812         if (tcp_checksum_complete(skb)) {
1813                 inet_twsk_put(inet_twsk(sk));
1814                 goto csum_error;
1815         }
1816
1817         switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1818         case TCP_TW_SYN:
1819         {
1820                 struct sock *sk2;
1821
1822                 sk2 = inet6_lookup_listener(net, net->ipv4.tcp_death_row.hashinfo,
1823                                             skb, __tcp_hdrlen(th),
1824                                             &ipv6_hdr(skb)->saddr, th->source,
1825                                             &ipv6_hdr(skb)->daddr,
1826                                             ntohs(th->dest),
1827                                             tcp_v6_iif_l3_slave(skb),
1828                                             sdif);
1829                 if (sk2) {
1830                         struct inet_timewait_sock *tw = inet_twsk(sk);
1831                         inet_twsk_deschedule_put(tw);
1832                         sk = sk2;
1833                         tcp_v6_restore_cb(skb);
1834                         refcounted = false;
1835                         goto process;
1836                 }
1837         }
1838                 /* to ACK */
1839                 fallthrough;
1840         case TCP_TW_ACK:
1841                 tcp_v6_timewait_ack(sk, skb);
1842                 break;
1843         case TCP_TW_RST:
1844                 tcp_v6_send_reset(sk, skb);
1845                 inet_twsk_deschedule_put(inet_twsk(sk));
1846                 goto discard_it;
1847         case TCP_TW_SUCCESS:
1848                 ;
1849         }
1850         goto discard_it;
1851 }
1852
1853 void tcp_v6_early_demux(struct sk_buff *skb)
1854 {
1855         struct net *net = dev_net(skb->dev);
1856         const struct ipv6hdr *hdr;
1857         const struct tcphdr *th;
1858         struct sock *sk;
1859
1860         if (skb->pkt_type != PACKET_HOST)
1861                 return;
1862
1863         if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1864                 return;
1865
1866         hdr = ipv6_hdr(skb);
1867         th = tcp_hdr(skb);
1868
1869         if (th->doff < sizeof(struct tcphdr) / 4)
1870                 return;
1871
1872         /* Note : We use inet6_iif() here, not tcp_v6_iif() */
1873         sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
1874                                         &hdr->saddr, th->source,
1875                                         &hdr->daddr, ntohs(th->dest),
1876                                         inet6_iif(skb), inet6_sdif(skb));
1877         if (sk) {
1878                 skb->sk = sk;
1879                 skb->destructor = sock_edemux;
1880                 if (sk_fullsock(sk)) {
1881                         struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst);
1882
1883                         if (dst)
1884                                 dst = dst_check(dst, sk->sk_rx_dst_cookie);
1885                         if (dst &&
1886                             sk->sk_rx_dst_ifindex == skb->skb_iif)
1887                                 skb_dst_set_noref(skb, dst);
1888                 }
1889         }
1890 }
1891
1892 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1893         .twsk_obj_size  = sizeof(struct tcp6_timewait_sock),
1894         .twsk_unique    = tcp_twsk_unique,
1895         .twsk_destructor = tcp_twsk_destructor,
1896 };
1897
1898 INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)
1899 {
1900         __tcp_v6_send_check(skb, &sk->sk_v6_rcv_saddr, &sk->sk_v6_daddr);
1901 }
1902
1903 const struct inet_connection_sock_af_ops ipv6_specific = {
1904         .queue_xmit        = inet6_csk_xmit,
1905         .send_check        = tcp_v6_send_check,
1906         .rebuild_header    = inet6_sk_rebuild_header,
1907         .sk_rx_dst_set     = inet6_sk_rx_dst_set,
1908         .conn_request      = tcp_v6_conn_request,
1909         .syn_recv_sock     = tcp_v6_syn_recv_sock,
1910         .net_header_len    = sizeof(struct ipv6hdr),
1911         .net_frag_header_len = sizeof(struct frag_hdr),
1912         .setsockopt        = ipv6_setsockopt,
1913         .getsockopt        = ipv6_getsockopt,
1914         .addr2sockaddr     = inet6_csk_addr2sockaddr,
1915         .sockaddr_len      = sizeof(struct sockaddr_in6),
1916         .mtu_reduced       = tcp_v6_mtu_reduced,
1917 };
1918
1919 #ifdef CONFIG_TCP_MD5SIG
1920 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1921         .md5_lookup     =       tcp_v6_md5_lookup,
1922         .calc_md5_hash  =       tcp_v6_md5_hash_skb,
1923         .md5_parse      =       tcp_v6_parse_md5_keys,
1924 };
1925 #endif
1926
1927 /*
1928  *      TCP over IPv4 via INET6 API
1929  */
1930 static const struct inet_connection_sock_af_ops ipv6_mapped = {
1931         .queue_xmit        = ip_queue_xmit,
1932         .send_check        = tcp_v4_send_check,
1933         .rebuild_header    = inet_sk_rebuild_header,
1934         .sk_rx_dst_set     = inet_sk_rx_dst_set,
1935         .conn_request      = tcp_v6_conn_request,
1936         .syn_recv_sock     = tcp_v6_syn_recv_sock,
1937         .net_header_len    = sizeof(struct iphdr),
1938         .setsockopt        = ipv6_setsockopt,
1939         .getsockopt        = ipv6_getsockopt,
1940         .addr2sockaddr     = inet6_csk_addr2sockaddr,
1941         .sockaddr_len      = sizeof(struct sockaddr_in6),
1942         .mtu_reduced       = tcp_v4_mtu_reduced,
1943 };
1944
1945 #ifdef CONFIG_TCP_MD5SIG
1946 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1947         .md5_lookup     =       tcp_v4_md5_lookup,
1948         .calc_md5_hash  =       tcp_v4_md5_hash_skb,
1949         .md5_parse      =       tcp_v6_parse_md5_keys,
1950 };
1951 #endif
1952
1953 /* NOTE: A lot of things set to zero explicitly by call to
1954  *       sk_alloc() so need not be done here.
1955  */
1956 static int tcp_v6_init_sock(struct sock *sk)
1957 {
1958         struct inet_connection_sock *icsk = inet_csk(sk);
1959
1960         tcp_init_sock(sk);
1961
1962         icsk->icsk_af_ops = &ipv6_specific;
1963
1964 #ifdef CONFIG_TCP_MD5SIG
1965         tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1966 #endif
1967
1968         return 0;
1969 }
1970
1971 static void tcp_v6_destroy_sock(struct sock *sk)
1972 {
1973         tcp_v4_destroy_sock(sk);
1974         inet6_destroy_sock(sk);
1975 }
1976
1977 #ifdef CONFIG_PROC_FS
1978 /* Proc filesystem TCPv6 sock list dumping. */
1979 static void get_openreq6(struct seq_file *seq,
1980                          const struct request_sock *req, int i)
1981 {
1982         long ttd = req->rsk_timer.expires - jiffies;
1983         const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
1984         const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
1985
1986         if (ttd < 0)
1987                 ttd = 0;
1988
1989         seq_printf(seq,
1990                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1991                    "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
1992                    i,
1993                    src->s6_addr32[0], src->s6_addr32[1],
1994                    src->s6_addr32[2], src->s6_addr32[3],
1995                    inet_rsk(req)->ir_num,
1996                    dest->s6_addr32[0], dest->s6_addr32[1],
1997                    dest->s6_addr32[2], dest->s6_addr32[3],
1998                    ntohs(inet_rsk(req)->ir_rmt_port),
1999                    TCP_SYN_RECV,
2000                    0, 0, /* could print option size, but that is af dependent. */
2001                    1,   /* timers active (only the expire timer) */
2002                    jiffies_to_clock_t(ttd),
2003                    req->num_timeout,
2004                    from_kuid_munged(seq_user_ns(seq),
2005                                     sock_i_uid(req->rsk_listener)),
2006                    0,  /* non standard timer */
2007                    0, /* open_requests have no inode */
2008                    0, req);
2009 }
2010
2011 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
2012 {
2013         const struct in6_addr *dest, *src;
2014         __u16 destp, srcp;
2015         int timer_active;
2016         unsigned long timer_expires;
2017         const struct inet_sock *inet = inet_sk(sp);
2018         const struct tcp_sock *tp = tcp_sk(sp);
2019         const struct inet_connection_sock *icsk = inet_csk(sp);
2020         const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
2021         int rx_queue;
2022         int state;
2023
2024         dest  = &sp->sk_v6_daddr;
2025         src   = &sp->sk_v6_rcv_saddr;
2026         destp = ntohs(inet->inet_dport);
2027         srcp  = ntohs(inet->inet_sport);
2028
2029         if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
2030             icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
2031             icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
2032                 timer_active    = 1;
2033                 timer_expires   = icsk->icsk_timeout;
2034         } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2035                 timer_active    = 4;
2036                 timer_expires   = icsk->icsk_timeout;
2037         } else if (timer_pending(&sp->sk_timer)) {
2038                 timer_active    = 2;
2039                 timer_expires   = sp->sk_timer.expires;
2040         } else {
2041                 timer_active    = 0;
2042                 timer_expires = jiffies;
2043         }
2044
2045         state = inet_sk_state_load(sp);
2046         if (state == TCP_LISTEN)
2047                 rx_queue = READ_ONCE(sp->sk_ack_backlog);
2048         else
2049                 /* Because we don't lock the socket,
2050                  * we might find a transient negative value.
2051                  */
2052                 rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
2053                                       READ_ONCE(tp->copied_seq), 0);
2054
2055         seq_printf(seq,
2056                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2057                    "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
2058                    i,
2059                    src->s6_addr32[0], src->s6_addr32[1],
2060                    src->s6_addr32[2], src->s6_addr32[3], srcp,
2061                    dest->s6_addr32[0], dest->s6_addr32[1],
2062                    dest->s6_addr32[2], dest->s6_addr32[3], destp,
2063                    state,
2064                    READ_ONCE(tp->write_seq) - tp->snd_una,
2065                    rx_queue,
2066                    timer_active,
2067                    jiffies_delta_to_clock_t(timer_expires - jiffies),
2068                    icsk->icsk_retransmits,
2069                    from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
2070                    icsk->icsk_probes_out,
2071                    sock_i_ino(sp),
2072                    refcount_read(&sp->sk_refcnt), sp,
2073                    jiffies_to_clock_t(icsk->icsk_rto),
2074                    jiffies_to_clock_t(icsk->icsk_ack.ato),
2075                    (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp),
2076                    tcp_snd_cwnd(tp),
2077                    state == TCP_LISTEN ?
2078                         fastopenq->max_qlen :
2079                         (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
2080                    );
2081 }
2082
2083 static void get_timewait6_sock(struct seq_file *seq,
2084                                struct inet_timewait_sock *tw, int i)
2085 {
2086         long delta = tw->tw_timer.expires - jiffies;
2087         const struct in6_addr *dest, *src;
2088         __u16 destp, srcp;
2089
2090         dest = &tw->tw_v6_daddr;
2091         src  = &tw->tw_v6_rcv_saddr;
2092         destp = ntohs(tw->tw_dport);
2093         srcp  = ntohs(tw->tw_sport);
2094
2095         seq_printf(seq,
2096                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2097                    "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
2098                    i,
2099                    src->s6_addr32[0], src->s6_addr32[1],
2100                    src->s6_addr32[2], src->s6_addr32[3], srcp,
2101                    dest->s6_addr32[0], dest->s6_addr32[1],
2102                    dest->s6_addr32[2], dest->s6_addr32[3], destp,
2103                    tw->tw_substate, 0, 0,
2104                    3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
2105                    refcount_read(&tw->tw_refcnt), tw);
2106 }
2107
2108 static int tcp6_seq_show(struct seq_file *seq, void *v)
2109 {
2110         struct tcp_iter_state *st;
2111         struct sock *sk = v;
2112
2113         if (v == SEQ_START_TOKEN) {
2114                 seq_puts(seq,
2115                          "  sl  "
2116                          "local_address                         "
2117                          "remote_address                        "
2118                          "st tx_queue rx_queue tr tm->when retrnsmt"
2119                          "   uid  timeout inode\n");
2120                 goto out;
2121         }
2122         st = seq->private;
2123
2124         if (sk->sk_state == TCP_TIME_WAIT)
2125                 get_timewait6_sock(seq, v, st->num);
2126         else if (sk->sk_state == TCP_NEW_SYN_RECV)
2127                 get_openreq6(seq, v, st->num);
2128         else
2129                 get_tcp6_sock(seq, v, st->num);
2130 out:
2131         return 0;
2132 }
2133
2134 static const struct seq_operations tcp6_seq_ops = {
2135         .show           = tcp6_seq_show,
2136         .start          = tcp_seq_start,
2137         .next           = tcp_seq_next,
2138         .stop           = tcp_seq_stop,
2139 };
2140
2141 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2142         .family         = AF_INET6,
2143 };
2144
2145 int __net_init tcp6_proc_init(struct net *net)
2146 {
2147         if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops,
2148                         sizeof(struct tcp_iter_state), &tcp6_seq_afinfo))
2149                 return -ENOMEM;
2150         return 0;
2151 }
2152
2153 void tcp6_proc_exit(struct net *net)
2154 {
2155         remove_proc_entry("tcp6", net->proc_net);
2156 }
2157 #endif
2158
2159 struct proto tcpv6_prot = {
2160         .name                   = "TCPv6",
2161         .owner                  = THIS_MODULE,
2162         .close                  = tcp_close,
2163         .pre_connect            = tcp_v6_pre_connect,
2164         .connect                = tcp_v6_connect,
2165         .disconnect             = tcp_disconnect,
2166         .accept                 = inet_csk_accept,
2167         .ioctl                  = tcp_ioctl,
2168         .init                   = tcp_v6_init_sock,
2169         .destroy                = tcp_v6_destroy_sock,
2170         .shutdown               = tcp_shutdown,
2171         .setsockopt             = tcp_setsockopt,
2172         .getsockopt             = tcp_getsockopt,
2173         .bpf_bypass_getsockopt  = tcp_bpf_bypass_getsockopt,
2174         .keepalive              = tcp_set_keepalive,
2175         .recvmsg                = tcp_recvmsg,
2176         .sendmsg                = tcp_sendmsg,
2177         .sendpage               = tcp_sendpage,
2178         .backlog_rcv            = tcp_v6_do_rcv,
2179         .release_cb             = tcp_release_cb,
2180         .hash                   = inet6_hash,
2181         .unhash                 = inet_unhash,
2182         .get_port               = inet_csk_get_port,
2183         .put_port               = inet_put_port,
2184 #ifdef CONFIG_BPF_SYSCALL
2185         .psock_update_sk_prot   = tcp_bpf_update_proto,
2186 #endif
2187         .enter_memory_pressure  = tcp_enter_memory_pressure,
2188         .leave_memory_pressure  = tcp_leave_memory_pressure,
2189         .stream_memory_free     = tcp_stream_memory_free,
2190         .sockets_allocated      = &tcp_sockets_allocated,
2191
2192         .memory_allocated       = &tcp_memory_allocated,
2193         .per_cpu_fw_alloc       = &tcp_memory_per_cpu_fw_alloc,
2194
2195         .memory_pressure        = &tcp_memory_pressure,
2196         .orphan_count           = &tcp_orphan_count,
2197         .sysctl_mem             = sysctl_tcp_mem,
2198         .sysctl_wmem_offset     = offsetof(struct net, ipv4.sysctl_tcp_wmem),
2199         .sysctl_rmem_offset     = offsetof(struct net, ipv4.sysctl_tcp_rmem),
2200         .max_header             = MAX_TCP_HEADER,
2201         .obj_size               = sizeof(struct tcp6_sock),
2202         .slab_flags             = SLAB_TYPESAFE_BY_RCU,
2203         .twsk_prot              = &tcp6_timewait_sock_ops,
2204         .rsk_prot               = &tcp6_request_sock_ops,
2205         .h.hashinfo             = NULL,
2206         .no_autobind            = true,
2207         .diag_destroy           = tcp_abort,
2208 };
2209 EXPORT_SYMBOL_GPL(tcpv6_prot);
2210
2211 static const struct inet6_protocol tcpv6_protocol = {
2212         .handler        =       tcp_v6_rcv,
2213         .err_handler    =       tcp_v6_err,
2214         .flags          =       INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2215 };
2216
2217 static struct inet_protosw tcpv6_protosw = {
2218         .type           =       SOCK_STREAM,
2219         .protocol       =       IPPROTO_TCP,
2220         .prot           =       &tcpv6_prot,
2221         .ops            =       &inet6_stream_ops,
2222         .flags          =       INET_PROTOSW_PERMANENT |
2223                                 INET_PROTOSW_ICSK,
2224 };
2225
2226 static int __net_init tcpv6_net_init(struct net *net)
2227 {
2228         return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2229                                     SOCK_RAW, IPPROTO_TCP, net);
2230 }
2231
2232 static void __net_exit tcpv6_net_exit(struct net *net)
2233 {
2234         inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2235 }
2236
2237 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
2238 {
2239         tcp_twsk_purge(net_exit_list, AF_INET6);
2240 }
2241
2242 static struct pernet_operations tcpv6_net_ops = {
2243         .init       = tcpv6_net_init,
2244         .exit       = tcpv6_net_exit,
2245         .exit_batch = tcpv6_net_exit_batch,
2246 };
2247
2248 int __init tcpv6_init(void)
2249 {
2250         int ret;
2251
2252         ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2253         if (ret)
2254                 goto out;
2255
2256         /* register inet6 protocol */
2257         ret = inet6_register_protosw(&tcpv6_protosw);
2258         if (ret)
2259                 goto out_tcpv6_protocol;
2260
2261         ret = register_pernet_subsys(&tcpv6_net_ops);
2262         if (ret)
2263                 goto out_tcpv6_protosw;
2264
2265         ret = mptcpv6_init();
2266         if (ret)
2267                 goto out_tcpv6_pernet_subsys;
2268
2269 out:
2270         return ret;
2271
2272 out_tcpv6_pernet_subsys:
2273         unregister_pernet_subsys(&tcpv6_net_ops);
2274 out_tcpv6_protosw:
2275         inet6_unregister_protosw(&tcpv6_protosw);
2276 out_tcpv6_protocol:
2277         inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2278         goto out;
2279 }
2280
2281 void tcpv6_exit(void)
2282 {
2283         unregister_pernet_subsys(&tcpv6_net_ops);
2284         inet6_unregister_protosw(&tcpv6_protosw);
2285         inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2286 }