mptcp: ensure listener is unhashed before updating the sk status
[platform/kernel/linux-starfive.git] / net / ipv6 / tcp_ipv6.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *      TCP over IPv6
4  *      Linux INET6 implementation
5  *
6  *      Authors:
7  *      Pedro Roque             <roque@di.fc.ul.pt>
8  *
9  *      Based on:
10  *      linux/net/ipv4/tcp.c
11  *      linux/net/ipv4/tcp_input.c
12  *      linux/net/ipv4/tcp_output.c
13  *
14  *      Fixes:
15  *      Hideaki YOSHIFUJI       :       sin6_scope_id support
16  *      YOSHIFUJI Hideaki @USAGI and:   Support IPV6_V6ONLY socket option, which
17  *      Alexey Kuznetsov                allow both IPv4 and IPv6 sockets to bind
18  *                                      a single port at the same time.
19  *      YOSHIFUJI Hideaki @USAGI:       convert /proc/net/tcp6 to seq_file.
20  */
21
22 #include <linux/bottom_half.h>
23 #include <linux/module.h>
24 #include <linux/errno.h>
25 #include <linux/types.h>
26 #include <linux/socket.h>
27 #include <linux/sockios.h>
28 #include <linux/net.h>
29 #include <linux/jiffies.h>
30 #include <linux/in.h>
31 #include <linux/in6.h>
32 #include <linux/netdevice.h>
33 #include <linux/init.h>
34 #include <linux/jhash.h>
35 #include <linux/ipsec.h>
36 #include <linux/times.h>
37 #include <linux/slab.h>
38 #include <linux/uaccess.h>
39 #include <linux/ipv6.h>
40 #include <linux/icmpv6.h>
41 #include <linux/random.h>
42 #include <linux/indirect_call_wrapper.h>
43
44 #include <net/tcp.h>
45 #include <net/ndisc.h>
46 #include <net/inet6_hashtables.h>
47 #include <net/inet6_connection_sock.h>
48 #include <net/ipv6.h>
49 #include <net/transp_v6.h>
50 #include <net/addrconf.h>
51 #include <net/ip6_route.h>
52 #include <net/ip6_checksum.h>
53 #include <net/inet_ecn.h>
54 #include <net/protocol.h>
55 #include <net/xfrm.h>
56 #include <net/snmp.h>
57 #include <net/dsfield.h>
58 #include <net/timewait_sock.h>
59 #include <net/inet_common.h>
60 #include <net/secure_seq.h>
61 #include <net/busy_poll.h>
62
63 #include <linux/proc_fs.h>
64 #include <linux/seq_file.h>
65
66 #include <crypto/hash.h>
67 #include <linux/scatterlist.h>
68
69 #include <trace/events/tcp.h>
70
71 static void     tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
72 static void     tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
73                                       struct request_sock *req);
74
75 INDIRECT_CALLABLE_SCOPE int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
76
77 static const struct inet_connection_sock_af_ops ipv6_mapped;
78 const struct inet_connection_sock_af_ops ipv6_specific;
79 #ifdef CONFIG_TCP_MD5SIG
80 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
81 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
82 #else
83 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
84                                                    const struct in6_addr *addr,
85                                                    int l3index)
86 {
87         return NULL;
88 }
89 #endif
90
91 /* Helper returning the inet6 address from a given tcp socket.
92  * It can be used in TCP stack instead of inet6_sk(sk).
93  * This avoids a dereference and allow compiler optimizations.
94  * It is a specialized version of inet6_sk_generic().
95  */
96 static struct ipv6_pinfo *tcp_inet6_sk(const struct sock *sk)
97 {
98         unsigned int offset = sizeof(struct tcp6_sock) - sizeof(struct ipv6_pinfo);
99
100         return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
101 }
102
103 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
104 {
105         struct dst_entry *dst = skb_dst(skb);
106
107         if (dst && dst_hold_safe(dst)) {
108                 const struct rt6_info *rt = (const struct rt6_info *)dst;
109
110                 rcu_assign_pointer(sk->sk_rx_dst, dst);
111                 sk->sk_rx_dst_ifindex = skb->skb_iif;
112                 sk->sk_rx_dst_cookie = rt6_get_cookie(rt);
113         }
114 }
115
116 static u32 tcp_v6_init_seq(const struct sk_buff *skb)
117 {
118         return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32,
119                                 ipv6_hdr(skb)->saddr.s6_addr32,
120                                 tcp_hdr(skb)->dest,
121                                 tcp_hdr(skb)->source);
122 }
123
124 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
125 {
126         return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32,
127                                    ipv6_hdr(skb)->saddr.s6_addr32);
128 }
129
130 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
131                               int addr_len)
132 {
133         /* This check is replicated from tcp_v6_connect() and intended to
134          * prevent BPF program called below from accessing bytes that are out
135          * of the bound specified by user in addr_len.
136          */
137         if (addr_len < SIN6_LEN_RFC2133)
138                 return -EINVAL;
139
140         sock_owned_by_me(sk);
141
142         return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr);
143 }
144
145 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
146                           int addr_len)
147 {
148         struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
149         struct inet_connection_sock *icsk = inet_csk(sk);
150         struct in6_addr *saddr = NULL, *final_p, final;
151         struct inet_timewait_death_row *tcp_death_row;
152         struct ipv6_pinfo *np = tcp_inet6_sk(sk);
153         struct inet_sock *inet = inet_sk(sk);
154         struct tcp_sock *tp = tcp_sk(sk);
155         struct net *net = sock_net(sk);
156         struct ipv6_txoptions *opt;
157         struct dst_entry *dst;
158         struct flowi6 fl6;
159         int addr_type;
160         int err;
161
162         if (addr_len < SIN6_LEN_RFC2133)
163                 return -EINVAL;
164
165         if (usin->sin6_family != AF_INET6)
166                 return -EAFNOSUPPORT;
167
168         memset(&fl6, 0, sizeof(fl6));
169
170         if (np->sndflow) {
171                 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
172                 IP6_ECN_flow_init(fl6.flowlabel);
173                 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
174                         struct ip6_flowlabel *flowlabel;
175                         flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
176                         if (IS_ERR(flowlabel))
177                                 return -EINVAL;
178                         fl6_sock_release(flowlabel);
179                 }
180         }
181
182         /*
183          *      connect() to INADDR_ANY means loopback (BSD'ism).
184          */
185
186         if (ipv6_addr_any(&usin->sin6_addr)) {
187                 if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
188                         ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
189                                                &usin->sin6_addr);
190                 else
191                         usin->sin6_addr = in6addr_loopback;
192         }
193
194         addr_type = ipv6_addr_type(&usin->sin6_addr);
195
196         if (addr_type & IPV6_ADDR_MULTICAST)
197                 return -ENETUNREACH;
198
199         if (addr_type&IPV6_ADDR_LINKLOCAL) {
200                 if (addr_len >= sizeof(struct sockaddr_in6) &&
201                     usin->sin6_scope_id) {
202                         /* If interface is set while binding, indices
203                          * must coincide.
204                          */
205                         if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id))
206                                 return -EINVAL;
207
208                         sk->sk_bound_dev_if = usin->sin6_scope_id;
209                 }
210
211                 /* Connect to link-local address requires an interface */
212                 if (!sk->sk_bound_dev_if)
213                         return -EINVAL;
214         }
215
216         if (tp->rx_opt.ts_recent_stamp &&
217             !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
218                 tp->rx_opt.ts_recent = 0;
219                 tp->rx_opt.ts_recent_stamp = 0;
220                 WRITE_ONCE(tp->write_seq, 0);
221         }
222
223         sk->sk_v6_daddr = usin->sin6_addr;
224         np->flow_label = fl6.flowlabel;
225
226         /*
227          *      TCP over IPv4
228          */
229
230         if (addr_type & IPV6_ADDR_MAPPED) {
231                 u32 exthdrlen = icsk->icsk_ext_hdr_len;
232                 struct sockaddr_in sin;
233
234                 if (ipv6_only_sock(sk))
235                         return -ENETUNREACH;
236
237                 sin.sin_family = AF_INET;
238                 sin.sin_port = usin->sin6_port;
239                 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
240
241                 /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */
242                 WRITE_ONCE(icsk->icsk_af_ops, &ipv6_mapped);
243                 if (sk_is_mptcp(sk))
244                         mptcpv6_handle_mapped(sk, true);
245                 sk->sk_backlog_rcv = tcp_v4_do_rcv;
246 #ifdef CONFIG_TCP_MD5SIG
247                 tp->af_specific = &tcp_sock_ipv6_mapped_specific;
248 #endif
249
250                 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
251
252                 if (err) {
253                         icsk->icsk_ext_hdr_len = exthdrlen;
254                         /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */
255                         WRITE_ONCE(icsk->icsk_af_ops, &ipv6_specific);
256                         if (sk_is_mptcp(sk))
257                                 mptcpv6_handle_mapped(sk, false);
258                         sk->sk_backlog_rcv = tcp_v6_do_rcv;
259 #ifdef CONFIG_TCP_MD5SIG
260                         tp->af_specific = &tcp_sock_ipv6_specific;
261 #endif
262                         goto failure;
263                 }
264                 np->saddr = sk->sk_v6_rcv_saddr;
265
266                 return err;
267         }
268
269         if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
270                 saddr = &sk->sk_v6_rcv_saddr;
271
272         fl6.flowi6_proto = IPPROTO_TCP;
273         fl6.daddr = sk->sk_v6_daddr;
274         fl6.saddr = saddr ? *saddr : np->saddr;
275         fl6.flowlabel = ip6_make_flowinfo(np->tclass, np->flow_label);
276         fl6.flowi6_oif = sk->sk_bound_dev_if;
277         fl6.flowi6_mark = sk->sk_mark;
278         fl6.fl6_dport = usin->sin6_port;
279         fl6.fl6_sport = inet->inet_sport;
280         fl6.flowi6_uid = sk->sk_uid;
281
282         opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
283         final_p = fl6_update_dst(&fl6, opt, &final);
284
285         security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
286
287         dst = ip6_dst_lookup_flow(net, sk, &fl6, final_p);
288         if (IS_ERR(dst)) {
289                 err = PTR_ERR(dst);
290                 goto failure;
291         }
292
293         tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
294
295         if (!saddr) {
296                 saddr = &fl6.saddr;
297
298                 err = inet_bhash2_update_saddr(sk, saddr, AF_INET6);
299                 if (err)
300                         goto failure;
301         }
302
303         /* set the source address */
304         np->saddr = *saddr;
305         inet->inet_rcv_saddr = LOOPBACK4_IPV6;
306
307         sk->sk_gso_type = SKB_GSO_TCPV6;
308         ip6_dst_store(sk, dst, NULL, NULL);
309
310         icsk->icsk_ext_hdr_len = 0;
311         if (opt)
312                 icsk->icsk_ext_hdr_len = opt->opt_flen +
313                                          opt->opt_nflen;
314
315         tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
316
317         inet->inet_dport = usin->sin6_port;
318
319         tcp_set_state(sk, TCP_SYN_SENT);
320         err = inet6_hash_connect(tcp_death_row, sk);
321         if (err)
322                 goto late_failure;
323
324         sk_set_txhash(sk);
325
326         if (likely(!tp->repair)) {
327                 if (!tp->write_seq)
328                         WRITE_ONCE(tp->write_seq,
329                                    secure_tcpv6_seq(np->saddr.s6_addr32,
330                                                     sk->sk_v6_daddr.s6_addr32,
331                                                     inet->inet_sport,
332                                                     inet->inet_dport));
333                 tp->tsoffset = secure_tcpv6_ts_off(net, np->saddr.s6_addr32,
334                                                    sk->sk_v6_daddr.s6_addr32);
335         }
336
337         if (tcp_fastopen_defer_connect(sk, &err))
338                 return err;
339         if (err)
340                 goto late_failure;
341
342         err = tcp_connect(sk);
343         if (err)
344                 goto late_failure;
345
346         return 0;
347
348 late_failure:
349         tcp_set_state(sk, TCP_CLOSE);
350         inet_bhash2_reset_saddr(sk);
351 failure:
352         inet->inet_dport = 0;
353         sk->sk_route_caps = 0;
354         return err;
355 }
356
357 static void tcp_v6_mtu_reduced(struct sock *sk)
358 {
359         struct dst_entry *dst;
360         u32 mtu;
361
362         if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
363                 return;
364
365         mtu = READ_ONCE(tcp_sk(sk)->mtu_info);
366
367         /* Drop requests trying to increase our current mss.
368          * Check done in __ip6_rt_update_pmtu() is too late.
369          */
370         if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache)
371                 return;
372
373         dst = inet6_csk_update_pmtu(sk, mtu);
374         if (!dst)
375                 return;
376
377         if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
378                 tcp_sync_mss(sk, dst_mtu(dst));
379                 tcp_simple_retransmit(sk);
380         }
381 }
382
383 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
384                 u8 type, u8 code, int offset, __be32 info)
385 {
386         const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
387         const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
388         struct net *net = dev_net(skb->dev);
389         struct request_sock *fastopen;
390         struct ipv6_pinfo *np;
391         struct tcp_sock *tp;
392         __u32 seq, snd_una;
393         struct sock *sk;
394         bool fatal;
395         int err;
396
397         sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
398                                         &hdr->daddr, th->dest,
399                                         &hdr->saddr, ntohs(th->source),
400                                         skb->dev->ifindex, inet6_sdif(skb));
401
402         if (!sk) {
403                 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
404                                   ICMP6_MIB_INERRORS);
405                 return -ENOENT;
406         }
407
408         if (sk->sk_state == TCP_TIME_WAIT) {
409                 inet_twsk_put(inet_twsk(sk));
410                 return 0;
411         }
412         seq = ntohl(th->seq);
413         fatal = icmpv6_err_convert(type, code, &err);
414         if (sk->sk_state == TCP_NEW_SYN_RECV) {
415                 tcp_req_err(sk, seq, fatal);
416                 return 0;
417         }
418
419         bh_lock_sock(sk);
420         if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
421                 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
422
423         if (sk->sk_state == TCP_CLOSE)
424                 goto out;
425
426         if (static_branch_unlikely(&ip6_min_hopcount)) {
427                 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
428                 if (ipv6_hdr(skb)->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) {
429                         __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
430                         goto out;
431                 }
432         }
433
434         tp = tcp_sk(sk);
435         /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
436         fastopen = rcu_dereference(tp->fastopen_rsk);
437         snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
438         if (sk->sk_state != TCP_LISTEN &&
439             !between(seq, snd_una, tp->snd_nxt)) {
440                 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
441                 goto out;
442         }
443
444         np = tcp_inet6_sk(sk);
445
446         if (type == NDISC_REDIRECT) {
447                 if (!sock_owned_by_user(sk)) {
448                         struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
449
450                         if (dst)
451                                 dst->ops->redirect(dst, sk, skb);
452                 }
453                 goto out;
454         }
455
456         if (type == ICMPV6_PKT_TOOBIG) {
457                 u32 mtu = ntohl(info);
458
459                 /* We are not interested in TCP_LISTEN and open_requests
460                  * (SYN-ACKs send out by Linux are always <576bytes so
461                  * they should go through unfragmented).
462                  */
463                 if (sk->sk_state == TCP_LISTEN)
464                         goto out;
465
466                 if (!ip6_sk_accept_pmtu(sk))
467                         goto out;
468
469                 if (mtu < IPV6_MIN_MTU)
470                         goto out;
471
472                 WRITE_ONCE(tp->mtu_info, mtu);
473
474                 if (!sock_owned_by_user(sk))
475                         tcp_v6_mtu_reduced(sk);
476                 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
477                                            &sk->sk_tsq_flags))
478                         sock_hold(sk);
479                 goto out;
480         }
481
482
483         /* Might be for an request_sock */
484         switch (sk->sk_state) {
485         case TCP_SYN_SENT:
486         case TCP_SYN_RECV:
487                 /* Only in fast or simultaneous open. If a fast open socket is
488                  * already accepted it is treated as a connected one below.
489                  */
490                 if (fastopen && !fastopen->sk)
491                         break;
492
493                 ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th);
494
495                 if (!sock_owned_by_user(sk)) {
496                         sk->sk_err = err;
497                         sk_error_report(sk);            /* Wake people up to see the error (see connect in sock.c) */
498
499                         tcp_done(sk);
500                 } else
501                         sk->sk_err_soft = err;
502                 goto out;
503         case TCP_LISTEN:
504                 break;
505         default:
506                 /* check if this ICMP message allows revert of backoff.
507                  * (see RFC 6069)
508                  */
509                 if (!fastopen && type == ICMPV6_DEST_UNREACH &&
510                     code == ICMPV6_NOROUTE)
511                         tcp_ld_RTO_revert(sk, seq);
512         }
513
514         if (!sock_owned_by_user(sk) && np->recverr) {
515                 sk->sk_err = err;
516                 sk_error_report(sk);
517         } else
518                 sk->sk_err_soft = err;
519
520 out:
521         bh_unlock_sock(sk);
522         sock_put(sk);
523         return 0;
524 }
525
526
527 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
528                               struct flowi *fl,
529                               struct request_sock *req,
530                               struct tcp_fastopen_cookie *foc,
531                               enum tcp_synack_type synack_type,
532                               struct sk_buff *syn_skb)
533 {
534         struct inet_request_sock *ireq = inet_rsk(req);
535         struct ipv6_pinfo *np = tcp_inet6_sk(sk);
536         struct ipv6_txoptions *opt;
537         struct flowi6 *fl6 = &fl->u.ip6;
538         struct sk_buff *skb;
539         int err = -ENOMEM;
540         u8 tclass;
541
542         /* First, grab a route. */
543         if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
544                                                IPPROTO_TCP)) == NULL)
545                 goto done;
546
547         skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb);
548
549         if (skb) {
550                 __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
551                                     &ireq->ir_v6_rmt_addr);
552
553                 fl6->daddr = ireq->ir_v6_rmt_addr;
554                 if (np->repflow && ireq->pktopts)
555                         fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
556
557                 tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ?
558                                 (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
559                                 (np->tclass & INET_ECN_MASK) :
560                                 np->tclass;
561
562                 if (!INET_ECN_is_capable(tclass) &&
563                     tcp_bpf_ca_needs_ecn((struct sock *)req))
564                         tclass |= INET_ECN_ECT_0;
565
566                 rcu_read_lock();
567                 opt = ireq->ipv6_opt;
568                 if (!opt)
569                         opt = rcu_dereference(np->opt);
570                 err = ip6_xmit(sk, skb, fl6, skb->mark ? : sk->sk_mark, opt,
571                                tclass, sk->sk_priority);
572                 rcu_read_unlock();
573                 err = net_xmit_eval(err);
574         }
575
576 done:
577         return err;
578 }
579
580
581 static void tcp_v6_reqsk_destructor(struct request_sock *req)
582 {
583         kfree(inet_rsk(req)->ipv6_opt);
584         consume_skb(inet_rsk(req)->pktopts);
585 }
586
587 #ifdef CONFIG_TCP_MD5SIG
588 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
589                                                    const struct in6_addr *addr,
590                                                    int l3index)
591 {
592         return tcp_md5_do_lookup(sk, l3index,
593                                  (union tcp_md5_addr *)addr, AF_INET6);
594 }
595
596 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
597                                                 const struct sock *addr_sk)
598 {
599         int l3index;
600
601         l3index = l3mdev_master_ifindex_by_index(sock_net(sk),
602                                                  addr_sk->sk_bound_dev_if);
603         return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr,
604                                     l3index);
605 }
606
607 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
608                                  sockptr_t optval, int optlen)
609 {
610         struct tcp_md5sig cmd;
611         struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
612         int l3index = 0;
613         u8 prefixlen;
614         u8 flags;
615
616         if (optlen < sizeof(cmd))
617                 return -EINVAL;
618
619         if (copy_from_sockptr(&cmd, optval, sizeof(cmd)))
620                 return -EFAULT;
621
622         if (sin6->sin6_family != AF_INET6)
623                 return -EINVAL;
624
625         flags = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX;
626
627         if (optname == TCP_MD5SIG_EXT &&
628             cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
629                 prefixlen = cmd.tcpm_prefixlen;
630                 if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
631                                         prefixlen > 32))
632                         return -EINVAL;
633         } else {
634                 prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
635         }
636
637         if (optname == TCP_MD5SIG_EXT && cmd.tcpm_ifindex &&
638             cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) {
639                 struct net_device *dev;
640
641                 rcu_read_lock();
642                 dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex);
643                 if (dev && netif_is_l3_master(dev))
644                         l3index = dev->ifindex;
645                 rcu_read_unlock();
646
647                 /* ok to reference set/not set outside of rcu;
648                  * right now device MUST be an L3 master
649                  */
650                 if (!dev || !l3index)
651                         return -EINVAL;
652         }
653
654         if (!cmd.tcpm_keylen) {
655                 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
656                         return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
657                                               AF_INET, prefixlen,
658                                               l3index, flags);
659                 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
660                                       AF_INET6, prefixlen, l3index, flags);
661         }
662
663         if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
664                 return -EINVAL;
665
666         if (ipv6_addr_v4mapped(&sin6->sin6_addr))
667                 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
668                                       AF_INET, prefixlen, l3index, flags,
669                                       cmd.tcpm_key, cmd.tcpm_keylen,
670                                       GFP_KERNEL);
671
672         return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
673                               AF_INET6, prefixlen, l3index, flags,
674                               cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
675 }
676
677 static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
678                                    const struct in6_addr *daddr,
679                                    const struct in6_addr *saddr,
680                                    const struct tcphdr *th, int nbytes)
681 {
682         struct tcp6_pseudohdr *bp;
683         struct scatterlist sg;
684         struct tcphdr *_th;
685
686         bp = hp->scratch;
687         /* 1. TCP pseudo-header (RFC2460) */
688         bp->saddr = *saddr;
689         bp->daddr = *daddr;
690         bp->protocol = cpu_to_be32(IPPROTO_TCP);
691         bp->len = cpu_to_be32(nbytes);
692
693         _th = (struct tcphdr *)(bp + 1);
694         memcpy(_th, th, sizeof(*th));
695         _th->check = 0;
696
697         sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
698         ahash_request_set_crypt(hp->md5_req, &sg, NULL,
699                                 sizeof(*bp) + sizeof(*th));
700         return crypto_ahash_update(hp->md5_req);
701 }
702
703 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
704                                const struct in6_addr *daddr, struct in6_addr *saddr,
705                                const struct tcphdr *th)
706 {
707         struct tcp_md5sig_pool *hp;
708         struct ahash_request *req;
709
710         hp = tcp_get_md5sig_pool();
711         if (!hp)
712                 goto clear_hash_noput;
713         req = hp->md5_req;
714
715         if (crypto_ahash_init(req))
716                 goto clear_hash;
717         if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
718                 goto clear_hash;
719         if (tcp_md5_hash_key(hp, key))
720                 goto clear_hash;
721         ahash_request_set_crypt(req, NULL, md5_hash, 0);
722         if (crypto_ahash_final(req))
723                 goto clear_hash;
724
725         tcp_put_md5sig_pool();
726         return 0;
727
728 clear_hash:
729         tcp_put_md5sig_pool();
730 clear_hash_noput:
731         memset(md5_hash, 0, 16);
732         return 1;
733 }
734
735 static int tcp_v6_md5_hash_skb(char *md5_hash,
736                                const struct tcp_md5sig_key *key,
737                                const struct sock *sk,
738                                const struct sk_buff *skb)
739 {
740         const struct in6_addr *saddr, *daddr;
741         struct tcp_md5sig_pool *hp;
742         struct ahash_request *req;
743         const struct tcphdr *th = tcp_hdr(skb);
744
745         if (sk) { /* valid for establish/request sockets */
746                 saddr = &sk->sk_v6_rcv_saddr;
747                 daddr = &sk->sk_v6_daddr;
748         } else {
749                 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
750                 saddr = &ip6h->saddr;
751                 daddr = &ip6h->daddr;
752         }
753
754         hp = tcp_get_md5sig_pool();
755         if (!hp)
756                 goto clear_hash_noput;
757         req = hp->md5_req;
758
759         if (crypto_ahash_init(req))
760                 goto clear_hash;
761
762         if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
763                 goto clear_hash;
764         if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
765                 goto clear_hash;
766         if (tcp_md5_hash_key(hp, key))
767                 goto clear_hash;
768         ahash_request_set_crypt(req, NULL, md5_hash, 0);
769         if (crypto_ahash_final(req))
770                 goto clear_hash;
771
772         tcp_put_md5sig_pool();
773         return 0;
774
775 clear_hash:
776         tcp_put_md5sig_pool();
777 clear_hash_noput:
778         memset(md5_hash, 0, 16);
779         return 1;
780 }
781
782 #endif
783
784 static void tcp_v6_init_req(struct request_sock *req,
785                             const struct sock *sk_listener,
786                             struct sk_buff *skb)
787 {
788         bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
789         struct inet_request_sock *ireq = inet_rsk(req);
790         const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener);
791
792         ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
793         ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
794
795         /* So that link locals have meaning */
796         if ((!sk_listener->sk_bound_dev_if || l3_slave) &&
797             ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
798                 ireq->ir_iif = tcp_v6_iif(skb);
799
800         if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
801             (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
802              np->rxopt.bits.rxinfo ||
803              np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
804              np->rxopt.bits.rxohlim || np->repflow)) {
805                 refcount_inc(&skb->users);
806                 ireq->pktopts = skb;
807         }
808 }
809
810 static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
811                                           struct sk_buff *skb,
812                                           struct flowi *fl,
813                                           struct request_sock *req)
814 {
815         tcp_v6_init_req(req, sk, skb);
816
817         if (security_inet_conn_request(sk, skb, req))
818                 return NULL;
819
820         return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
821 }
822
823 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
824         .family         =       AF_INET6,
825         .obj_size       =       sizeof(struct tcp6_request_sock),
826         .rtx_syn_ack    =       tcp_rtx_synack,
827         .send_ack       =       tcp_v6_reqsk_send_ack,
828         .destructor     =       tcp_v6_reqsk_destructor,
829         .send_reset     =       tcp_v6_send_reset,
830         .syn_ack_timeout =      tcp_syn_ack_timeout,
831 };
832
833 const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
834         .mss_clamp      =       IPV6_MIN_MTU - sizeof(struct tcphdr) -
835                                 sizeof(struct ipv6hdr),
836 #ifdef CONFIG_TCP_MD5SIG
837         .req_md5_lookup =       tcp_v6_md5_lookup,
838         .calc_md5_hash  =       tcp_v6_md5_hash_skb,
839 #endif
840 #ifdef CONFIG_SYN_COOKIES
841         .cookie_init_seq =      cookie_v6_init_sequence,
842 #endif
843         .route_req      =       tcp_v6_route_req,
844         .init_seq       =       tcp_v6_init_seq,
845         .init_ts_off    =       tcp_v6_init_ts_off,
846         .send_synack    =       tcp_v6_send_synack,
847 };
848
849 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
850                                  u32 ack, u32 win, u32 tsval, u32 tsecr,
851                                  int oif, struct tcp_md5sig_key *key, int rst,
852                                  u8 tclass, __be32 label, u32 priority, u32 txhash)
853 {
854         const struct tcphdr *th = tcp_hdr(skb);
855         struct tcphdr *t1;
856         struct sk_buff *buff;
857         struct flowi6 fl6;
858         struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
859         struct sock *ctl_sk = net->ipv6.tcp_sk;
860         unsigned int tot_len = sizeof(struct tcphdr);
861         __be32 mrst = 0, *topt;
862         struct dst_entry *dst;
863         __u32 mark = 0;
864
865         if (tsecr)
866                 tot_len += TCPOLEN_TSTAMP_ALIGNED;
867 #ifdef CONFIG_TCP_MD5SIG
868         if (key)
869                 tot_len += TCPOLEN_MD5SIG_ALIGNED;
870 #endif
871
872 #ifdef CONFIG_MPTCP
873         if (rst && !key) {
874                 mrst = mptcp_reset_option(skb);
875
876                 if (mrst)
877                         tot_len += sizeof(__be32);
878         }
879 #endif
880
881         buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
882         if (!buff)
883                 return;
884
885         skb_reserve(buff, MAX_TCP_HEADER);
886
887         t1 = skb_push(buff, tot_len);
888         skb_reset_transport_header(buff);
889
890         /* Swap the send and the receive. */
891         memset(t1, 0, sizeof(*t1));
892         t1->dest = th->source;
893         t1->source = th->dest;
894         t1->doff = tot_len / 4;
895         t1->seq = htonl(seq);
896         t1->ack_seq = htonl(ack);
897         t1->ack = !rst || !th->ack;
898         t1->rst = rst;
899         t1->window = htons(win);
900
901         topt = (__be32 *)(t1 + 1);
902
903         if (tsecr) {
904                 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
905                                 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
906                 *topt++ = htonl(tsval);
907                 *topt++ = htonl(tsecr);
908         }
909
910         if (mrst)
911                 *topt++ = mrst;
912
913 #ifdef CONFIG_TCP_MD5SIG
914         if (key) {
915                 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
916                                 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
917                 tcp_v6_md5_hash_hdr((__u8 *)topt, key,
918                                     &ipv6_hdr(skb)->saddr,
919                                     &ipv6_hdr(skb)->daddr, t1);
920         }
921 #endif
922
923         memset(&fl6, 0, sizeof(fl6));
924         fl6.daddr = ipv6_hdr(skb)->saddr;
925         fl6.saddr = ipv6_hdr(skb)->daddr;
926         fl6.flowlabel = label;
927
928         buff->ip_summed = CHECKSUM_PARTIAL;
929
930         __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
931
932         fl6.flowi6_proto = IPPROTO_TCP;
933         if (rt6_need_strict(&fl6.daddr) && !oif)
934                 fl6.flowi6_oif = tcp_v6_iif(skb);
935         else {
936                 if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
937                         oif = skb->skb_iif;
938
939                 fl6.flowi6_oif = oif;
940         }
941
942         if (sk) {
943                 if (sk->sk_state == TCP_TIME_WAIT)
944                         mark = inet_twsk(sk)->tw_mark;
945                 else
946                         mark = sk->sk_mark;
947                 skb_set_delivery_time(buff, tcp_transmit_time(sk), true);
948         }
949         if (txhash) {
950                 /* autoflowlabel/skb_get_hash_flowi6 rely on buff->hash */
951                 skb_set_hash(buff, txhash, PKT_HASH_TYPE_L4);
952         }
953         fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark;
954         fl6.fl6_dport = t1->dest;
955         fl6.fl6_sport = t1->source;
956         fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
957         security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
958
959         /* Pass a socket to ip6_dst_lookup either it is for RST
960          * Underlying function will use this to retrieve the network
961          * namespace
962          */
963         if (sk && sk->sk_state != TCP_TIME_WAIT)
964                 dst = ip6_dst_lookup_flow(net, sk, &fl6, NULL); /*sk's xfrm_policy can be referred*/
965         else
966                 dst = ip6_dst_lookup_flow(net, ctl_sk, &fl6, NULL);
967         if (!IS_ERR(dst)) {
968                 skb_dst_set(buff, dst);
969                 ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL,
970                          tclass & ~INET_ECN_MASK, priority);
971                 TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
972                 if (rst)
973                         TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
974                 return;
975         }
976
977         kfree_skb(buff);
978 }
979
980 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
981 {
982         const struct tcphdr *th = tcp_hdr(skb);
983         struct ipv6hdr *ipv6h = ipv6_hdr(skb);
984         u32 seq = 0, ack_seq = 0;
985         struct tcp_md5sig_key *key = NULL;
986 #ifdef CONFIG_TCP_MD5SIG
987         const __u8 *hash_location = NULL;
988         unsigned char newhash[16];
989         int genhash;
990         struct sock *sk1 = NULL;
991 #endif
992         __be32 label = 0;
993         u32 priority = 0;
994         struct net *net;
995         u32 txhash = 0;
996         int oif = 0;
997
998         if (th->rst)
999                 return;
1000
1001         /* If sk not NULL, it means we did a successful lookup and incoming
1002          * route had to be correct. prequeue might have dropped our dst.
1003          */
1004         if (!sk && !ipv6_unicast_destination(skb))
1005                 return;
1006
1007         net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
1008 #ifdef CONFIG_TCP_MD5SIG
1009         rcu_read_lock();
1010         hash_location = tcp_parse_md5sig_option(th);
1011         if (sk && sk_fullsock(sk)) {
1012                 int l3index;
1013
1014                 /* sdif set, means packet ingressed via a device
1015                  * in an L3 domain and inet_iif is set to it.
1016                  */
1017                 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1018                 key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index);
1019         } else if (hash_location) {
1020                 int dif = tcp_v6_iif_l3_slave(skb);
1021                 int sdif = tcp_v6_sdif(skb);
1022                 int l3index;
1023
1024                 /*
1025                  * active side is lost. Try to find listening socket through
1026                  * source port, and then find md5 key through listening socket.
1027                  * we are not loose security here:
1028                  * Incoming packet is checked with md5 hash with finding key,
1029                  * no RST generated if md5 hash doesn't match.
1030                  */
1031                 sk1 = inet6_lookup_listener(net, net->ipv4.tcp_death_row.hashinfo,
1032                                             NULL, 0, &ipv6h->saddr, th->source,
1033                                             &ipv6h->daddr, ntohs(th->source),
1034                                             dif, sdif);
1035                 if (!sk1)
1036                         goto out;
1037
1038                 /* sdif set, means packet ingressed via a device
1039                  * in an L3 domain and dif is set to it.
1040                  */
1041                 l3index = tcp_v6_sdif(skb) ? dif : 0;
1042
1043                 key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index);
1044                 if (!key)
1045                         goto out;
1046
1047                 genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
1048                 if (genhash || memcmp(hash_location, newhash, 16) != 0)
1049                         goto out;
1050         }
1051 #endif
1052
1053         if (th->ack)
1054                 seq = ntohl(th->ack_seq);
1055         else
1056                 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
1057                           (th->doff << 2);
1058
1059         if (sk) {
1060                 oif = sk->sk_bound_dev_if;
1061                 if (sk_fullsock(sk)) {
1062                         const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1063
1064                         trace_tcp_send_reset(sk, skb);
1065                         if (np->repflow)
1066                                 label = ip6_flowlabel(ipv6h);
1067                         priority = sk->sk_priority;
1068                         txhash = sk->sk_txhash;
1069                 }
1070                 if (sk->sk_state == TCP_TIME_WAIT) {
1071                         label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel);
1072                         priority = inet_twsk(sk)->tw_priority;
1073                         txhash = inet_twsk(sk)->tw_txhash;
1074                 }
1075         } else {
1076                 if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET)
1077                         label = ip6_flowlabel(ipv6h);
1078         }
1079
1080         tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1,
1081                              ipv6_get_dsfield(ipv6h), label, priority, txhash);
1082
1083 #ifdef CONFIG_TCP_MD5SIG
1084 out:
1085         rcu_read_unlock();
1086 #endif
1087 }
1088
1089 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
1090                             u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
1091                             struct tcp_md5sig_key *key, u8 tclass,
1092                             __be32 label, u32 priority, u32 txhash)
1093 {
1094         tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
1095                              tclass, label, priority, txhash);
1096 }
1097
1098 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1099 {
1100         struct inet_timewait_sock *tw = inet_twsk(sk);
1101         struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1102
1103         tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1104                         tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1105                         tcp_time_stamp_raw() + tcptw->tw_ts_offset,
1106                         tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
1107                         tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority,
1108                         tw->tw_txhash);
1109
1110         inet_twsk_put(tw);
1111 }
1112
1113 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
1114                                   struct request_sock *req)
1115 {
1116         int l3index;
1117
1118         l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1119
1120         /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
1121          * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
1122          */
1123         /* RFC 7323 2.3
1124          * The window field (SEG.WND) of every outgoing segment, with the
1125          * exception of <SYN> segments, MUST be right-shifted by
1126          * Rcv.Wind.Shift bits:
1127          */
1128         tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
1129                         tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
1130                         tcp_rsk(req)->rcv_nxt,
1131                         req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
1132                         tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
1133                         req->ts_recent, sk->sk_bound_dev_if,
1134                         tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index),
1135                         ipv6_get_dsfield(ipv6_hdr(skb)), 0, sk->sk_priority,
1136                         tcp_rsk(req)->txhash);
1137 }
1138
1139
1140 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
1141 {
1142 #ifdef CONFIG_SYN_COOKIES
1143         const struct tcphdr *th = tcp_hdr(skb);
1144
1145         if (!th->syn)
1146                 sk = cookie_v6_check(sk, skb);
1147 #endif
1148         return sk;
1149 }
1150
1151 u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
1152                          struct tcphdr *th, u32 *cookie)
1153 {
1154         u16 mss = 0;
1155 #ifdef CONFIG_SYN_COOKIES
1156         mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops,
1157                                     &tcp_request_sock_ipv6_ops, sk, th);
1158         if (mss) {
1159                 *cookie = __cookie_v6_init_sequence(iph, th, &mss);
1160                 tcp_synq_overflow(sk);
1161         }
1162 #endif
1163         return mss;
1164 }
1165
1166 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1167 {
1168         if (skb->protocol == htons(ETH_P_IP))
1169                 return tcp_v4_conn_request(sk, skb);
1170
1171         if (!ipv6_unicast_destination(skb))
1172                 goto drop;
1173
1174         if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) {
1175                 __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS);
1176                 return 0;
1177         }
1178
1179         return tcp_conn_request(&tcp6_request_sock_ops,
1180                                 &tcp_request_sock_ipv6_ops, sk, skb);
1181
1182 drop:
1183         tcp_listendrop(sk);
1184         return 0; /* don't send reset */
1185 }
1186
1187 static void tcp_v6_restore_cb(struct sk_buff *skb)
1188 {
1189         /* We need to move header back to the beginning if xfrm6_policy_check()
1190          * and tcp_v6_fill_cb() are going to be called again.
1191          * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
1192          */
1193         memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
1194                 sizeof(struct inet6_skb_parm));
1195 }
1196
1197 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1198                                          struct request_sock *req,
1199                                          struct dst_entry *dst,
1200                                          struct request_sock *req_unhash,
1201                                          bool *own_req)
1202 {
1203         struct inet_request_sock *ireq;
1204         struct ipv6_pinfo *newnp;
1205         const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1206         struct ipv6_txoptions *opt;
1207         struct inet_sock *newinet;
1208         bool found_dup_sk = false;
1209         struct tcp_sock *newtp;
1210         struct sock *newsk;
1211 #ifdef CONFIG_TCP_MD5SIG
1212         struct tcp_md5sig_key *key;
1213         int l3index;
1214 #endif
1215         struct flowi6 fl6;
1216
1217         if (skb->protocol == htons(ETH_P_IP)) {
1218                 /*
1219                  *      v6 mapped
1220                  */
1221
1222                 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
1223                                              req_unhash, own_req);
1224
1225                 if (!newsk)
1226                         return NULL;
1227
1228                 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1229
1230                 newnp = tcp_inet6_sk(newsk);
1231                 newtp = tcp_sk(newsk);
1232
1233                 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1234
1235                 newnp->saddr = newsk->sk_v6_rcv_saddr;
1236
1237                 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1238                 if (sk_is_mptcp(newsk))
1239                         mptcpv6_handle_mapped(newsk, true);
1240                 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1241 #ifdef CONFIG_TCP_MD5SIG
1242                 newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1243 #endif
1244
1245                 newnp->ipv6_mc_list = NULL;
1246                 newnp->ipv6_ac_list = NULL;
1247                 newnp->ipv6_fl_list = NULL;
1248                 newnp->pktoptions  = NULL;
1249                 newnp->opt         = NULL;
1250                 newnp->mcast_oif   = inet_iif(skb);
1251                 newnp->mcast_hops  = ip_hdr(skb)->ttl;
1252                 newnp->rcv_flowinfo = 0;
1253                 if (np->repflow)
1254                         newnp->flow_label = 0;
1255
1256                 /*
1257                  * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1258                  * here, tcp_create_openreq_child now does this for us, see the comment in
1259                  * that function for the gory details. -acme
1260                  */
1261
1262                 /* It is tricky place. Until this moment IPv4 tcp
1263                    worked with IPv6 icsk.icsk_af_ops.
1264                    Sync it now.
1265                  */
1266                 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1267
1268                 return newsk;
1269         }
1270
1271         ireq = inet_rsk(req);
1272
1273         if (sk_acceptq_is_full(sk))
1274                 goto out_overflow;
1275
1276         if (!dst) {
1277                 dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
1278                 if (!dst)
1279                         goto out;
1280         }
1281
1282         newsk = tcp_create_openreq_child(sk, req, skb);
1283         if (!newsk)
1284                 goto out_nonewsk;
1285
1286         /*
1287          * No need to charge this sock to the relevant IPv6 refcnt debug socks
1288          * count here, tcp_create_openreq_child now does this for us, see the
1289          * comment in that function for the gory details. -acme
1290          */
1291
1292         newsk->sk_gso_type = SKB_GSO_TCPV6;
1293         ip6_dst_store(newsk, dst, NULL, NULL);
1294         inet6_sk_rx_dst_set(newsk, skb);
1295
1296         inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1297
1298         newtp = tcp_sk(newsk);
1299         newinet = inet_sk(newsk);
1300         newnp = tcp_inet6_sk(newsk);
1301
1302         memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1303
1304         newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
1305         newnp->saddr = ireq->ir_v6_loc_addr;
1306         newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
1307         newsk->sk_bound_dev_if = ireq->ir_iif;
1308
1309         /* Now IPv6 options...
1310
1311            First: no IPv4 options.
1312          */
1313         newinet->inet_opt = NULL;
1314         newnp->ipv6_mc_list = NULL;
1315         newnp->ipv6_ac_list = NULL;
1316         newnp->ipv6_fl_list = NULL;
1317
1318         /* Clone RX bits */
1319         newnp->rxopt.all = np->rxopt.all;
1320
1321         newnp->pktoptions = NULL;
1322         newnp->opt        = NULL;
1323         newnp->mcast_oif  = tcp_v6_iif(skb);
1324         newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1325         newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1326         if (np->repflow)
1327                 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1328
1329         /* Set ToS of the new socket based upon the value of incoming SYN.
1330          * ECT bits are set later in tcp_init_transfer().
1331          */
1332         if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos))
1333                 newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;
1334
1335         /* Clone native IPv6 options from listening socket (if any)
1336
1337            Yes, keeping reference count would be much more clever,
1338            but we make one more one thing there: reattach optmem
1339            to newsk.
1340          */
1341         opt = ireq->ipv6_opt;
1342         if (!opt)
1343                 opt = rcu_dereference(np->opt);
1344         if (opt) {
1345                 opt = ipv6_dup_options(newsk, opt);
1346                 RCU_INIT_POINTER(newnp->opt, opt);
1347         }
1348         inet_csk(newsk)->icsk_ext_hdr_len = 0;
1349         if (opt)
1350                 inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
1351                                                     opt->opt_flen;
1352
1353         tcp_ca_openreq_child(newsk, dst);
1354
1355         tcp_sync_mss(newsk, dst_mtu(dst));
1356         newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1357
1358         tcp_initialize_rcv_mss(newsk);
1359
1360         newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1361         newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1362
1363 #ifdef CONFIG_TCP_MD5SIG
1364         l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif);
1365
1366         /* Copy over the MD5 key from the original socket */
1367         key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index);
1368         if (key) {
1369                 /* We're using one, so create a matching key
1370                  * on the newsk structure. If we fail to get
1371                  * memory, then we end up not copying the key
1372                  * across. Shucks.
1373                  */
1374                 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr,
1375                                AF_INET6, 128, l3index, key->flags, key->key, key->keylen,
1376                                sk_gfp_mask(sk, GFP_ATOMIC));
1377         }
1378 #endif
1379
1380         if (__inet_inherit_port(sk, newsk) < 0) {
1381                 inet_csk_prepare_forced_close(newsk);
1382                 tcp_done(newsk);
1383                 goto out;
1384         }
1385         *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash),
1386                                        &found_dup_sk);
1387         if (*own_req) {
1388                 tcp_move_syn(newtp, req);
1389
1390                 /* Clone pktoptions received with SYN, if we own the req */
1391                 if (ireq->pktopts) {
1392                         newnp->pktoptions = skb_clone_and_charge_r(ireq->pktopts, newsk);
1393                         consume_skb(ireq->pktopts);
1394                         ireq->pktopts = NULL;
1395                         if (newnp->pktoptions)
1396                                 tcp_v6_restore_cb(newnp->pktoptions);
1397                 }
1398         } else {
1399                 if (!req_unhash && found_dup_sk) {
1400                         /* This code path should only be executed in the
1401                          * syncookie case only
1402                          */
1403                         bh_unlock_sock(newsk);
1404                         sock_put(newsk);
1405                         newsk = NULL;
1406                 }
1407         }
1408
1409         return newsk;
1410
1411 out_overflow:
1412         __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1413 out_nonewsk:
1414         dst_release(dst);
1415 out:
1416         tcp_listendrop(sk);
1417         return NULL;
1418 }
1419
1420 INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *,
1421                                                            u32));
1422 /* The socket must have it's spinlock held when we get
1423  * here, unless it is a TCP_LISTEN socket.
1424  *
1425  * We have a potential double-lock case here, so even when
1426  * doing backlog processing we use the BH locking scheme.
1427  * This is because we cannot sleep with the original spinlock
1428  * held.
1429  */
1430 INDIRECT_CALLABLE_SCOPE
1431 int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1432 {
1433         struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1434         struct sk_buff *opt_skb = NULL;
1435         enum skb_drop_reason reason;
1436         struct tcp_sock *tp;
1437
1438         /* Imagine: socket is IPv6. IPv4 packet arrives,
1439            goes to IPv4 receive handler and backlogged.
1440            From backlog it always goes here. Kerboom...
1441            Fortunately, tcp_rcv_established and rcv_established
1442            handle them correctly, but it is not case with
1443            tcp_v6_hnd_req and tcp_v6_send_reset().   --ANK
1444          */
1445
1446         if (skb->protocol == htons(ETH_P_IP))
1447                 return tcp_v4_do_rcv(sk, skb);
1448
1449         /*
1450          *      socket locking is here for SMP purposes as backlog rcv
1451          *      is currently called with bh processing disabled.
1452          */
1453
1454         /* Do Stevens' IPV6_PKTOPTIONS.
1455
1456            Yes, guys, it is the only place in our code, where we
1457            may make it not affecting IPv4.
1458            The rest of code is protocol independent,
1459            and I do not like idea to uglify IPv4.
1460
1461            Actually, all the idea behind IPV6_PKTOPTIONS
1462            looks not very well thought. For now we latch
1463            options, received in the last packet, enqueued
1464            by tcp. Feel free to propose better solution.
1465                                                --ANK (980728)
1466          */
1467         if (np->rxopt.all)
1468                 opt_skb = skb_clone_and_charge_r(skb, sk);
1469
1470         reason = SKB_DROP_REASON_NOT_SPECIFIED;
1471         if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1472                 struct dst_entry *dst;
1473
1474                 dst = rcu_dereference_protected(sk->sk_rx_dst,
1475                                                 lockdep_sock_is_held(sk));
1476
1477                 sock_rps_save_rxhash(sk, skb);
1478                 sk_mark_napi_id(sk, skb);
1479                 if (dst) {
1480                         if (sk->sk_rx_dst_ifindex != skb->skb_iif ||
1481                             INDIRECT_CALL_1(dst->ops->check, ip6_dst_check,
1482                                             dst, sk->sk_rx_dst_cookie) == NULL) {
1483                                 RCU_INIT_POINTER(sk->sk_rx_dst, NULL);
1484                                 dst_release(dst);
1485                         }
1486                 }
1487
1488                 tcp_rcv_established(sk, skb);
1489                 if (opt_skb)
1490                         goto ipv6_pktoptions;
1491                 return 0;
1492         }
1493
1494         if (tcp_checksum_complete(skb))
1495                 goto csum_err;
1496
1497         if (sk->sk_state == TCP_LISTEN) {
1498                 struct sock *nsk = tcp_v6_cookie_check(sk, skb);
1499
1500                 if (!nsk)
1501                         goto discard;
1502
1503                 if (nsk != sk) {
1504                         if (tcp_child_process(sk, nsk, skb))
1505                                 goto reset;
1506                         if (opt_skb)
1507                                 __kfree_skb(opt_skb);
1508                         return 0;
1509                 }
1510         } else
1511                 sock_rps_save_rxhash(sk, skb);
1512
1513         if (tcp_rcv_state_process(sk, skb))
1514                 goto reset;
1515         if (opt_skb)
1516                 goto ipv6_pktoptions;
1517         return 0;
1518
1519 reset:
1520         tcp_v6_send_reset(sk, skb);
1521 discard:
1522         if (opt_skb)
1523                 __kfree_skb(opt_skb);
1524         kfree_skb_reason(skb, reason);
1525         return 0;
1526 csum_err:
1527         reason = SKB_DROP_REASON_TCP_CSUM;
1528         trace_tcp_bad_csum(skb);
1529         TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1530         TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1531         goto discard;
1532
1533
1534 ipv6_pktoptions:
1535         /* Do you ask, what is it?
1536
1537            1. skb was enqueued by tcp.
1538            2. skb is added to tail of read queue, rather than out of order.
1539            3. socket is not in passive state.
1540            4. Finally, it really contains options, which user wants to receive.
1541          */
1542         tp = tcp_sk(sk);
1543         if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1544             !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1545                 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1546                         np->mcast_oif = tcp_v6_iif(opt_skb);
1547                 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1548                         np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1549                 if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1550                         np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1551                 if (np->repflow)
1552                         np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1553                 if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
1554                         tcp_v6_restore_cb(opt_skb);
1555                         opt_skb = xchg(&np->pktoptions, opt_skb);
1556                 } else {
1557                         __kfree_skb(opt_skb);
1558                         opt_skb = xchg(&np->pktoptions, NULL);
1559                 }
1560         }
1561
1562         consume_skb(opt_skb);
1563         return 0;
1564 }
1565
1566 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
1567                            const struct tcphdr *th)
1568 {
1569         /* This is tricky: we move IP6CB at its correct location into
1570          * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
1571          * _decode_session6() uses IP6CB().
1572          * barrier() makes sure compiler won't play aliasing games.
1573          */
1574         memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
1575                 sizeof(struct inet6_skb_parm));
1576         barrier();
1577
1578         TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1579         TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1580                                     skb->len - th->doff*4);
1581         TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1582         TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1583         TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1584         TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1585         TCP_SKB_CB(skb)->sacked = 0;
1586         TCP_SKB_CB(skb)->has_rxtstamp =
1587                         skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1588 }
1589
1590 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
1591 {
1592         enum skb_drop_reason drop_reason;
1593         int sdif = inet6_sdif(skb);
1594         int dif = inet6_iif(skb);
1595         const struct tcphdr *th;
1596         const struct ipv6hdr *hdr;
1597         bool refcounted;
1598         struct sock *sk;
1599         int ret;
1600         struct net *net = dev_net(skb->dev);
1601
1602         drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
1603         if (skb->pkt_type != PACKET_HOST)
1604                 goto discard_it;
1605
1606         /*
1607          *      Count it even if it's bad.
1608          */
1609         __TCP_INC_STATS(net, TCP_MIB_INSEGS);
1610
1611         if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1612                 goto discard_it;
1613
1614         th = (const struct tcphdr *)skb->data;
1615
1616         if (unlikely(th->doff < sizeof(struct tcphdr) / 4)) {
1617                 drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL;
1618                 goto bad_packet;
1619         }
1620         if (!pskb_may_pull(skb, th->doff*4))
1621                 goto discard_it;
1622
1623         if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1624                 goto csum_error;
1625
1626         th = (const struct tcphdr *)skb->data;
1627         hdr = ipv6_hdr(skb);
1628
1629 lookup:
1630         sk = __inet6_lookup_skb(net->ipv4.tcp_death_row.hashinfo, skb, __tcp_hdrlen(th),
1631                                 th->source, th->dest, inet6_iif(skb), sdif,
1632                                 &refcounted);
1633         if (!sk)
1634                 goto no_tcp_socket;
1635
1636 process:
1637         if (sk->sk_state == TCP_TIME_WAIT)
1638                 goto do_time_wait;
1639
1640         if (sk->sk_state == TCP_NEW_SYN_RECV) {
1641                 struct request_sock *req = inet_reqsk(sk);
1642                 bool req_stolen = false;
1643                 struct sock *nsk;
1644
1645                 sk = req->rsk_listener;
1646                 drop_reason = tcp_inbound_md5_hash(sk, skb,
1647                                                    &hdr->saddr, &hdr->daddr,
1648                                                    AF_INET6, dif, sdif);
1649                 if (drop_reason) {
1650                         sk_drops_add(sk, skb);
1651                         reqsk_put(req);
1652                         goto discard_it;
1653                 }
1654                 if (tcp_checksum_complete(skb)) {
1655                         reqsk_put(req);
1656                         goto csum_error;
1657                 }
1658                 if (unlikely(sk->sk_state != TCP_LISTEN)) {
1659                         nsk = reuseport_migrate_sock(sk, req_to_sk(req), skb);
1660                         if (!nsk) {
1661                                 inet_csk_reqsk_queue_drop_and_put(sk, req);
1662                                 goto lookup;
1663                         }
1664                         sk = nsk;
1665                         /* reuseport_migrate_sock() has already held one sk_refcnt
1666                          * before returning.
1667                          */
1668                 } else {
1669                         sock_hold(sk);
1670                 }
1671                 refcounted = true;
1672                 nsk = NULL;
1673                 if (!tcp_filter(sk, skb)) {
1674                         th = (const struct tcphdr *)skb->data;
1675                         hdr = ipv6_hdr(skb);
1676                         tcp_v6_fill_cb(skb, hdr, th);
1677                         nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
1678                 } else {
1679                         drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
1680                 }
1681                 if (!nsk) {
1682                         reqsk_put(req);
1683                         if (req_stolen) {
1684                                 /* Another cpu got exclusive access to req
1685                                  * and created a full blown socket.
1686                                  * Try to feed this packet to this socket
1687                                  * instead of discarding it.
1688                                  */
1689                                 tcp_v6_restore_cb(skb);
1690                                 sock_put(sk);
1691                                 goto lookup;
1692                         }
1693                         goto discard_and_relse;
1694                 }
1695                 if (nsk == sk) {
1696                         reqsk_put(req);
1697                         tcp_v6_restore_cb(skb);
1698                 } else if (tcp_child_process(sk, nsk, skb)) {
1699                         tcp_v6_send_reset(nsk, skb);
1700                         goto discard_and_relse;
1701                 } else {
1702                         sock_put(sk);
1703                         return 0;
1704                 }
1705         }
1706
1707         if (static_branch_unlikely(&ip6_min_hopcount)) {
1708                 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
1709                 if (hdr->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) {
1710                         __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1711                         goto discard_and_relse;
1712                 }
1713         }
1714
1715         if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) {
1716                 drop_reason = SKB_DROP_REASON_XFRM_POLICY;
1717                 goto discard_and_relse;
1718         }
1719
1720         drop_reason = tcp_inbound_md5_hash(sk, skb, &hdr->saddr, &hdr->daddr,
1721                                            AF_INET6, dif, sdif);
1722         if (drop_reason)
1723                 goto discard_and_relse;
1724
1725         nf_reset_ct(skb);
1726
1727         if (tcp_filter(sk, skb)) {
1728                 drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
1729                 goto discard_and_relse;
1730         }
1731         th = (const struct tcphdr *)skb->data;
1732         hdr = ipv6_hdr(skb);
1733         tcp_v6_fill_cb(skb, hdr, th);
1734
1735         skb->dev = NULL;
1736
1737         if (sk->sk_state == TCP_LISTEN) {
1738                 ret = tcp_v6_do_rcv(sk, skb);
1739                 goto put_and_return;
1740         }
1741
1742         sk_incoming_cpu_update(sk);
1743
1744         bh_lock_sock_nested(sk);
1745         tcp_segs_in(tcp_sk(sk), skb);
1746         ret = 0;
1747         if (!sock_owned_by_user(sk)) {
1748                 ret = tcp_v6_do_rcv(sk, skb);
1749         } else {
1750                 if (tcp_add_backlog(sk, skb, &drop_reason))
1751                         goto discard_and_relse;
1752         }
1753         bh_unlock_sock(sk);
1754 put_and_return:
1755         if (refcounted)
1756                 sock_put(sk);
1757         return ret ? -1 : 0;
1758
1759 no_tcp_socket:
1760         drop_reason = SKB_DROP_REASON_NO_SOCKET;
1761         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1762                 goto discard_it;
1763
1764         tcp_v6_fill_cb(skb, hdr, th);
1765
1766         if (tcp_checksum_complete(skb)) {
1767 csum_error:
1768                 drop_reason = SKB_DROP_REASON_TCP_CSUM;
1769                 trace_tcp_bad_csum(skb);
1770                 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1771 bad_packet:
1772                 __TCP_INC_STATS(net, TCP_MIB_INERRS);
1773         } else {
1774                 tcp_v6_send_reset(NULL, skb);
1775         }
1776
1777 discard_it:
1778         SKB_DR_OR(drop_reason, NOT_SPECIFIED);
1779         kfree_skb_reason(skb, drop_reason);
1780         return 0;
1781
1782 discard_and_relse:
1783         sk_drops_add(sk, skb);
1784         if (refcounted)
1785                 sock_put(sk);
1786         goto discard_it;
1787
1788 do_time_wait:
1789         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1790                 drop_reason = SKB_DROP_REASON_XFRM_POLICY;
1791                 inet_twsk_put(inet_twsk(sk));
1792                 goto discard_it;
1793         }
1794
1795         tcp_v6_fill_cb(skb, hdr, th);
1796
1797         if (tcp_checksum_complete(skb)) {
1798                 inet_twsk_put(inet_twsk(sk));
1799                 goto csum_error;
1800         }
1801
1802         switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1803         case TCP_TW_SYN:
1804         {
1805                 struct sock *sk2;
1806
1807                 sk2 = inet6_lookup_listener(net, net->ipv4.tcp_death_row.hashinfo,
1808                                             skb, __tcp_hdrlen(th),
1809                                             &ipv6_hdr(skb)->saddr, th->source,
1810                                             &ipv6_hdr(skb)->daddr,
1811                                             ntohs(th->dest),
1812                                             tcp_v6_iif_l3_slave(skb),
1813                                             sdif);
1814                 if (sk2) {
1815                         struct inet_timewait_sock *tw = inet_twsk(sk);
1816                         inet_twsk_deschedule_put(tw);
1817                         sk = sk2;
1818                         tcp_v6_restore_cb(skb);
1819                         refcounted = false;
1820                         goto process;
1821                 }
1822         }
1823                 /* to ACK */
1824                 fallthrough;
1825         case TCP_TW_ACK:
1826                 tcp_v6_timewait_ack(sk, skb);
1827                 break;
1828         case TCP_TW_RST:
1829                 tcp_v6_send_reset(sk, skb);
1830                 inet_twsk_deschedule_put(inet_twsk(sk));
1831                 goto discard_it;
1832         case TCP_TW_SUCCESS:
1833                 ;
1834         }
1835         goto discard_it;
1836 }
1837
1838 void tcp_v6_early_demux(struct sk_buff *skb)
1839 {
1840         struct net *net = dev_net(skb->dev);
1841         const struct ipv6hdr *hdr;
1842         const struct tcphdr *th;
1843         struct sock *sk;
1844
1845         if (skb->pkt_type != PACKET_HOST)
1846                 return;
1847
1848         if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1849                 return;
1850
1851         hdr = ipv6_hdr(skb);
1852         th = tcp_hdr(skb);
1853
1854         if (th->doff < sizeof(struct tcphdr) / 4)
1855                 return;
1856
1857         /* Note : We use inet6_iif() here, not tcp_v6_iif() */
1858         sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
1859                                         &hdr->saddr, th->source,
1860                                         &hdr->daddr, ntohs(th->dest),
1861                                         inet6_iif(skb), inet6_sdif(skb));
1862         if (sk) {
1863                 skb->sk = sk;
1864                 skb->destructor = sock_edemux;
1865                 if (sk_fullsock(sk)) {
1866                         struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst);
1867
1868                         if (dst)
1869                                 dst = dst_check(dst, sk->sk_rx_dst_cookie);
1870                         if (dst &&
1871                             sk->sk_rx_dst_ifindex == skb->skb_iif)
1872                                 skb_dst_set_noref(skb, dst);
1873                 }
1874         }
1875 }
1876
1877 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1878         .twsk_obj_size  = sizeof(struct tcp6_timewait_sock),
1879         .twsk_unique    = tcp_twsk_unique,
1880         .twsk_destructor = tcp_twsk_destructor,
1881 };
1882
1883 INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)
1884 {
1885         __tcp_v6_send_check(skb, &sk->sk_v6_rcv_saddr, &sk->sk_v6_daddr);
1886 }
1887
1888 const struct inet_connection_sock_af_ops ipv6_specific = {
1889         .queue_xmit        = inet6_csk_xmit,
1890         .send_check        = tcp_v6_send_check,
1891         .rebuild_header    = inet6_sk_rebuild_header,
1892         .sk_rx_dst_set     = inet6_sk_rx_dst_set,
1893         .conn_request      = tcp_v6_conn_request,
1894         .syn_recv_sock     = tcp_v6_syn_recv_sock,
1895         .net_header_len    = sizeof(struct ipv6hdr),
1896         .net_frag_header_len = sizeof(struct frag_hdr),
1897         .setsockopt        = ipv6_setsockopt,
1898         .getsockopt        = ipv6_getsockopt,
1899         .addr2sockaddr     = inet6_csk_addr2sockaddr,
1900         .sockaddr_len      = sizeof(struct sockaddr_in6),
1901         .mtu_reduced       = tcp_v6_mtu_reduced,
1902 };
1903
1904 #ifdef CONFIG_TCP_MD5SIG
1905 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1906         .md5_lookup     =       tcp_v6_md5_lookup,
1907         .calc_md5_hash  =       tcp_v6_md5_hash_skb,
1908         .md5_parse      =       tcp_v6_parse_md5_keys,
1909 };
1910 #endif
1911
1912 /*
1913  *      TCP over IPv4 via INET6 API
1914  */
1915 static const struct inet_connection_sock_af_ops ipv6_mapped = {
1916         .queue_xmit        = ip_queue_xmit,
1917         .send_check        = tcp_v4_send_check,
1918         .rebuild_header    = inet_sk_rebuild_header,
1919         .sk_rx_dst_set     = inet_sk_rx_dst_set,
1920         .conn_request      = tcp_v6_conn_request,
1921         .syn_recv_sock     = tcp_v6_syn_recv_sock,
1922         .net_header_len    = sizeof(struct iphdr),
1923         .setsockopt        = ipv6_setsockopt,
1924         .getsockopt        = ipv6_getsockopt,
1925         .addr2sockaddr     = inet6_csk_addr2sockaddr,
1926         .sockaddr_len      = sizeof(struct sockaddr_in6),
1927         .mtu_reduced       = tcp_v4_mtu_reduced,
1928 };
1929
1930 #ifdef CONFIG_TCP_MD5SIG
1931 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1932         .md5_lookup     =       tcp_v4_md5_lookup,
1933         .calc_md5_hash  =       tcp_v4_md5_hash_skb,
1934         .md5_parse      =       tcp_v6_parse_md5_keys,
1935 };
1936 #endif
1937
1938 /* NOTE: A lot of things set to zero explicitly by call to
1939  *       sk_alloc() so need not be done here.
1940  */
1941 static int tcp_v6_init_sock(struct sock *sk)
1942 {
1943         struct inet_connection_sock *icsk = inet_csk(sk);
1944
1945         tcp_init_sock(sk);
1946
1947         icsk->icsk_af_ops = &ipv6_specific;
1948
1949 #ifdef CONFIG_TCP_MD5SIG
1950         tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1951 #endif
1952
1953         return 0;
1954 }
1955
1956 #ifdef CONFIG_PROC_FS
1957 /* Proc filesystem TCPv6 sock list dumping. */
1958 static void get_openreq6(struct seq_file *seq,
1959                          const struct request_sock *req, int i)
1960 {
1961         long ttd = req->rsk_timer.expires - jiffies;
1962         const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
1963         const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
1964
1965         if (ttd < 0)
1966                 ttd = 0;
1967
1968         seq_printf(seq,
1969                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1970                    "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
1971                    i,
1972                    src->s6_addr32[0], src->s6_addr32[1],
1973                    src->s6_addr32[2], src->s6_addr32[3],
1974                    inet_rsk(req)->ir_num,
1975                    dest->s6_addr32[0], dest->s6_addr32[1],
1976                    dest->s6_addr32[2], dest->s6_addr32[3],
1977                    ntohs(inet_rsk(req)->ir_rmt_port),
1978                    TCP_SYN_RECV,
1979                    0, 0, /* could print option size, but that is af dependent. */
1980                    1,   /* timers active (only the expire timer) */
1981                    jiffies_to_clock_t(ttd),
1982                    req->num_timeout,
1983                    from_kuid_munged(seq_user_ns(seq),
1984                                     sock_i_uid(req->rsk_listener)),
1985                    0,  /* non standard timer */
1986                    0, /* open_requests have no inode */
1987                    0, req);
1988 }
1989
1990 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1991 {
1992         const struct in6_addr *dest, *src;
1993         __u16 destp, srcp;
1994         int timer_active;
1995         unsigned long timer_expires;
1996         const struct inet_sock *inet = inet_sk(sp);
1997         const struct tcp_sock *tp = tcp_sk(sp);
1998         const struct inet_connection_sock *icsk = inet_csk(sp);
1999         const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
2000         int rx_queue;
2001         int state;
2002
2003         dest  = &sp->sk_v6_daddr;
2004         src   = &sp->sk_v6_rcv_saddr;
2005         destp = ntohs(inet->inet_dport);
2006         srcp  = ntohs(inet->inet_sport);
2007
2008         if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
2009             icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
2010             icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
2011                 timer_active    = 1;
2012                 timer_expires   = icsk->icsk_timeout;
2013         } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2014                 timer_active    = 4;
2015                 timer_expires   = icsk->icsk_timeout;
2016         } else if (timer_pending(&sp->sk_timer)) {
2017                 timer_active    = 2;
2018                 timer_expires   = sp->sk_timer.expires;
2019         } else {
2020                 timer_active    = 0;
2021                 timer_expires = jiffies;
2022         }
2023
2024         state = inet_sk_state_load(sp);
2025         if (state == TCP_LISTEN)
2026                 rx_queue = READ_ONCE(sp->sk_ack_backlog);
2027         else
2028                 /* Because we don't lock the socket,
2029                  * we might find a transient negative value.
2030                  */
2031                 rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
2032                                       READ_ONCE(tp->copied_seq), 0);
2033
2034         seq_printf(seq,
2035                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2036                    "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
2037                    i,
2038                    src->s6_addr32[0], src->s6_addr32[1],
2039                    src->s6_addr32[2], src->s6_addr32[3], srcp,
2040                    dest->s6_addr32[0], dest->s6_addr32[1],
2041                    dest->s6_addr32[2], dest->s6_addr32[3], destp,
2042                    state,
2043                    READ_ONCE(tp->write_seq) - tp->snd_una,
2044                    rx_queue,
2045                    timer_active,
2046                    jiffies_delta_to_clock_t(timer_expires - jiffies),
2047                    icsk->icsk_retransmits,
2048                    from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
2049                    icsk->icsk_probes_out,
2050                    sock_i_ino(sp),
2051                    refcount_read(&sp->sk_refcnt), sp,
2052                    jiffies_to_clock_t(icsk->icsk_rto),
2053                    jiffies_to_clock_t(icsk->icsk_ack.ato),
2054                    (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp),
2055                    tcp_snd_cwnd(tp),
2056                    state == TCP_LISTEN ?
2057                         fastopenq->max_qlen :
2058                         (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
2059                    );
2060 }
2061
2062 static void get_timewait6_sock(struct seq_file *seq,
2063                                struct inet_timewait_sock *tw, int i)
2064 {
2065         long delta = tw->tw_timer.expires - jiffies;
2066         const struct in6_addr *dest, *src;
2067         __u16 destp, srcp;
2068
2069         dest = &tw->tw_v6_daddr;
2070         src  = &tw->tw_v6_rcv_saddr;
2071         destp = ntohs(tw->tw_dport);
2072         srcp  = ntohs(tw->tw_sport);
2073
2074         seq_printf(seq,
2075                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2076                    "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
2077                    i,
2078                    src->s6_addr32[0], src->s6_addr32[1],
2079                    src->s6_addr32[2], src->s6_addr32[3], srcp,
2080                    dest->s6_addr32[0], dest->s6_addr32[1],
2081                    dest->s6_addr32[2], dest->s6_addr32[3], destp,
2082                    tw->tw_substate, 0, 0,
2083                    3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
2084                    refcount_read(&tw->tw_refcnt), tw);
2085 }
2086
2087 static int tcp6_seq_show(struct seq_file *seq, void *v)
2088 {
2089         struct tcp_iter_state *st;
2090         struct sock *sk = v;
2091
2092         if (v == SEQ_START_TOKEN) {
2093                 seq_puts(seq,
2094                          "  sl  "
2095                          "local_address                         "
2096                          "remote_address                        "
2097                          "st tx_queue rx_queue tr tm->when retrnsmt"
2098                          "   uid  timeout inode\n");
2099                 goto out;
2100         }
2101         st = seq->private;
2102
2103         if (sk->sk_state == TCP_TIME_WAIT)
2104                 get_timewait6_sock(seq, v, st->num);
2105         else if (sk->sk_state == TCP_NEW_SYN_RECV)
2106                 get_openreq6(seq, v, st->num);
2107         else
2108                 get_tcp6_sock(seq, v, st->num);
2109 out:
2110         return 0;
2111 }
2112
2113 static const struct seq_operations tcp6_seq_ops = {
2114         .show           = tcp6_seq_show,
2115         .start          = tcp_seq_start,
2116         .next           = tcp_seq_next,
2117         .stop           = tcp_seq_stop,
2118 };
2119
2120 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2121         .family         = AF_INET6,
2122 };
2123
2124 int __net_init tcp6_proc_init(struct net *net)
2125 {
2126         if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops,
2127                         sizeof(struct tcp_iter_state), &tcp6_seq_afinfo))
2128                 return -ENOMEM;
2129         return 0;
2130 }
2131
2132 void tcp6_proc_exit(struct net *net)
2133 {
2134         remove_proc_entry("tcp6", net->proc_net);
2135 }
2136 #endif
2137
2138 struct proto tcpv6_prot = {
2139         .name                   = "TCPv6",
2140         .owner                  = THIS_MODULE,
2141         .close                  = tcp_close,
2142         .pre_connect            = tcp_v6_pre_connect,
2143         .connect                = tcp_v6_connect,
2144         .disconnect             = tcp_disconnect,
2145         .accept                 = inet_csk_accept,
2146         .ioctl                  = tcp_ioctl,
2147         .init                   = tcp_v6_init_sock,
2148         .destroy                = tcp_v4_destroy_sock,
2149         .shutdown               = tcp_shutdown,
2150         .setsockopt             = tcp_setsockopt,
2151         .getsockopt             = tcp_getsockopt,
2152         .bpf_bypass_getsockopt  = tcp_bpf_bypass_getsockopt,
2153         .keepalive              = tcp_set_keepalive,
2154         .recvmsg                = tcp_recvmsg,
2155         .sendmsg                = tcp_sendmsg,
2156         .sendpage               = tcp_sendpage,
2157         .backlog_rcv            = tcp_v6_do_rcv,
2158         .release_cb             = tcp_release_cb,
2159         .hash                   = inet6_hash,
2160         .unhash                 = inet_unhash,
2161         .get_port               = inet_csk_get_port,
2162         .put_port               = inet_put_port,
2163 #ifdef CONFIG_BPF_SYSCALL
2164         .psock_update_sk_prot   = tcp_bpf_update_proto,
2165 #endif
2166         .enter_memory_pressure  = tcp_enter_memory_pressure,
2167         .leave_memory_pressure  = tcp_leave_memory_pressure,
2168         .stream_memory_free     = tcp_stream_memory_free,
2169         .sockets_allocated      = &tcp_sockets_allocated,
2170
2171         .memory_allocated       = &tcp_memory_allocated,
2172         .per_cpu_fw_alloc       = &tcp_memory_per_cpu_fw_alloc,
2173
2174         .memory_pressure        = &tcp_memory_pressure,
2175         .orphan_count           = &tcp_orphan_count,
2176         .sysctl_mem             = sysctl_tcp_mem,
2177         .sysctl_wmem_offset     = offsetof(struct net, ipv4.sysctl_tcp_wmem),
2178         .sysctl_rmem_offset     = offsetof(struct net, ipv4.sysctl_tcp_rmem),
2179         .max_header             = MAX_TCP_HEADER,
2180         .obj_size               = sizeof(struct tcp6_sock),
2181         .slab_flags             = SLAB_TYPESAFE_BY_RCU,
2182         .twsk_prot              = &tcp6_timewait_sock_ops,
2183         .rsk_prot               = &tcp6_request_sock_ops,
2184         .h.hashinfo             = NULL,
2185         .no_autobind            = true,
2186         .diag_destroy           = tcp_abort,
2187 };
2188 EXPORT_SYMBOL_GPL(tcpv6_prot);
2189
2190 static const struct inet6_protocol tcpv6_protocol = {
2191         .handler        =       tcp_v6_rcv,
2192         .err_handler    =       tcp_v6_err,
2193         .flags          =       INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2194 };
2195
2196 static struct inet_protosw tcpv6_protosw = {
2197         .type           =       SOCK_STREAM,
2198         .protocol       =       IPPROTO_TCP,
2199         .prot           =       &tcpv6_prot,
2200         .ops            =       &inet6_stream_ops,
2201         .flags          =       INET_PROTOSW_PERMANENT |
2202                                 INET_PROTOSW_ICSK,
2203 };
2204
2205 static int __net_init tcpv6_net_init(struct net *net)
2206 {
2207         return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2208                                     SOCK_RAW, IPPROTO_TCP, net);
2209 }
2210
2211 static void __net_exit tcpv6_net_exit(struct net *net)
2212 {
2213         inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2214 }
2215
2216 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
2217 {
2218         tcp_twsk_purge(net_exit_list, AF_INET6);
2219 }
2220
2221 static struct pernet_operations tcpv6_net_ops = {
2222         .init       = tcpv6_net_init,
2223         .exit       = tcpv6_net_exit,
2224         .exit_batch = tcpv6_net_exit_batch,
2225 };
2226
2227 int __init tcpv6_init(void)
2228 {
2229         int ret;
2230
2231         ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2232         if (ret)
2233                 goto out;
2234
2235         /* register inet6 protocol */
2236         ret = inet6_register_protosw(&tcpv6_protosw);
2237         if (ret)
2238                 goto out_tcpv6_protocol;
2239
2240         ret = register_pernet_subsys(&tcpv6_net_ops);
2241         if (ret)
2242                 goto out_tcpv6_protosw;
2243
2244         ret = mptcpv6_init();
2245         if (ret)
2246                 goto out_tcpv6_pernet_subsys;
2247
2248 out:
2249         return ret;
2250
2251 out_tcpv6_pernet_subsys:
2252         unregister_pernet_subsys(&tcpv6_net_ops);
2253 out_tcpv6_protosw:
2254         inet6_unregister_protosw(&tcpv6_protosw);
2255 out_tcpv6_protocol:
2256         inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2257         goto out;
2258 }
2259
2260 void tcpv6_exit(void)
2261 {
2262         unregister_pernet_subsys(&tcpv6_net_ops);
2263         inet6_unregister_protosw(&tcpv6_protosw);
2264         inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2265 }