tcp: add accessors to read/set tp->snd_cwnd
[platform/kernel/linux-rpi.git] / net / ipv6 / tcp_ipv6.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *      TCP over IPv6
4  *      Linux INET6 implementation
5  *
6  *      Authors:
7  *      Pedro Roque             <roque@di.fc.ul.pt>
8  *
9  *      Based on:
10  *      linux/net/ipv4/tcp.c
11  *      linux/net/ipv4/tcp_input.c
12  *      linux/net/ipv4/tcp_output.c
13  *
14  *      Fixes:
15  *      Hideaki YOSHIFUJI       :       sin6_scope_id support
16  *      YOSHIFUJI Hideaki @USAGI and:   Support IPV6_V6ONLY socket option, which
17  *      Alexey Kuznetsov                allow both IPv4 and IPv6 sockets to bind
18  *                                      a single port at the same time.
19  *      YOSHIFUJI Hideaki @USAGI:       convert /proc/net/tcp6 to seq_file.
20  */
21
22 #include <linux/bottom_half.h>
23 #include <linux/module.h>
24 #include <linux/errno.h>
25 #include <linux/types.h>
26 #include <linux/socket.h>
27 #include <linux/sockios.h>
28 #include <linux/net.h>
29 #include <linux/jiffies.h>
30 #include <linux/in.h>
31 #include <linux/in6.h>
32 #include <linux/netdevice.h>
33 #include <linux/init.h>
34 #include <linux/jhash.h>
35 #include <linux/ipsec.h>
36 #include <linux/times.h>
37 #include <linux/slab.h>
38 #include <linux/uaccess.h>
39 #include <linux/ipv6.h>
40 #include <linux/icmpv6.h>
41 #include <linux/random.h>
42 #include <linux/indirect_call_wrapper.h>
43
44 #include <net/tcp.h>
45 #include <net/ndisc.h>
46 #include <net/inet6_hashtables.h>
47 #include <net/inet6_connection_sock.h>
48 #include <net/ipv6.h>
49 #include <net/transp_v6.h>
50 #include <net/addrconf.h>
51 #include <net/ip6_route.h>
52 #include <net/ip6_checksum.h>
53 #include <net/inet_ecn.h>
54 #include <net/protocol.h>
55 #include <net/xfrm.h>
56 #include <net/snmp.h>
57 #include <net/dsfield.h>
58 #include <net/timewait_sock.h>
59 #include <net/inet_common.h>
60 #include <net/secure_seq.h>
61 #include <net/busy_poll.h>
62
63 #include <linux/proc_fs.h>
64 #include <linux/seq_file.h>
65
66 #include <crypto/hash.h>
67 #include <linux/scatterlist.h>
68
69 #include <trace/events/tcp.h>
70
71 static void     tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
72 static void     tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
73                                       struct request_sock *req);
74
75 static int      tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
76
77 static const struct inet_connection_sock_af_ops ipv6_mapped;
78 const struct inet_connection_sock_af_ops ipv6_specific;
79 #ifdef CONFIG_TCP_MD5SIG
80 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
81 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
82 #else
83 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
84                                                    const struct in6_addr *addr,
85                                                    int l3index)
86 {
87         return NULL;
88 }
89 #endif
90
91 /* Helper returning the inet6 address from a given tcp socket.
92  * It can be used in TCP stack instead of inet6_sk(sk).
93  * This avoids a dereference and allow compiler optimizations.
94  * It is a specialized version of inet6_sk_generic().
95  */
96 static struct ipv6_pinfo *tcp_inet6_sk(const struct sock *sk)
97 {
98         unsigned int offset = sizeof(struct tcp6_sock) - sizeof(struct ipv6_pinfo);
99
100         return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
101 }
102
103 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
104 {
105         struct dst_entry *dst = skb_dst(skb);
106
107         if (dst && dst_hold_safe(dst)) {
108                 const struct rt6_info *rt = (const struct rt6_info *)dst;
109
110                 rcu_assign_pointer(sk->sk_rx_dst, dst);
111                 sk->sk_rx_dst_ifindex = skb->skb_iif;
112                 sk->sk_rx_dst_cookie = rt6_get_cookie(rt);
113         }
114 }
115
116 static u32 tcp_v6_init_seq(const struct sk_buff *skb)
117 {
118         return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32,
119                                 ipv6_hdr(skb)->saddr.s6_addr32,
120                                 tcp_hdr(skb)->dest,
121                                 tcp_hdr(skb)->source);
122 }
123
124 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
125 {
126         return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32,
127                                    ipv6_hdr(skb)->saddr.s6_addr32);
128 }
129
130 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
131                               int addr_len)
132 {
133         /* This check is replicated from tcp_v6_connect() and intended to
134          * prevent BPF program called below from accessing bytes that are out
135          * of the bound specified by user in addr_len.
136          */
137         if (addr_len < SIN6_LEN_RFC2133)
138                 return -EINVAL;
139
140         sock_owned_by_me(sk);
141
142         return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr);
143 }
144
145 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
146                           int addr_len)
147 {
148         struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
149         struct inet_sock *inet = inet_sk(sk);
150         struct inet_connection_sock *icsk = inet_csk(sk);
151         struct ipv6_pinfo *np = tcp_inet6_sk(sk);
152         struct tcp_sock *tp = tcp_sk(sk);
153         struct in6_addr *saddr = NULL, *final_p, final;
154         struct ipv6_txoptions *opt;
155         struct flowi6 fl6;
156         struct dst_entry *dst;
157         int addr_type;
158         int err;
159         struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
160
161         if (addr_len < SIN6_LEN_RFC2133)
162                 return -EINVAL;
163
164         if (usin->sin6_family != AF_INET6)
165                 return -EAFNOSUPPORT;
166
167         memset(&fl6, 0, sizeof(fl6));
168
169         if (np->sndflow) {
170                 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
171                 IP6_ECN_flow_init(fl6.flowlabel);
172                 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
173                         struct ip6_flowlabel *flowlabel;
174                         flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
175                         if (IS_ERR(flowlabel))
176                                 return -EINVAL;
177                         fl6_sock_release(flowlabel);
178                 }
179         }
180
181         /*
182          *      connect() to INADDR_ANY means loopback (BSD'ism).
183          */
184
185         if (ipv6_addr_any(&usin->sin6_addr)) {
186                 if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
187                         ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
188                                                &usin->sin6_addr);
189                 else
190                         usin->sin6_addr = in6addr_loopback;
191         }
192
193         addr_type = ipv6_addr_type(&usin->sin6_addr);
194
195         if (addr_type & IPV6_ADDR_MULTICAST)
196                 return -ENETUNREACH;
197
198         if (addr_type&IPV6_ADDR_LINKLOCAL) {
199                 if (addr_len >= sizeof(struct sockaddr_in6) &&
200                     usin->sin6_scope_id) {
201                         /* If interface is set while binding, indices
202                          * must coincide.
203                          */
204                         if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id))
205                                 return -EINVAL;
206
207                         sk->sk_bound_dev_if = usin->sin6_scope_id;
208                 }
209
210                 /* Connect to link-local address requires an interface */
211                 if (!sk->sk_bound_dev_if)
212                         return -EINVAL;
213         }
214
215         if (tp->rx_opt.ts_recent_stamp &&
216             !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
217                 tp->rx_opt.ts_recent = 0;
218                 tp->rx_opt.ts_recent_stamp = 0;
219                 WRITE_ONCE(tp->write_seq, 0);
220         }
221
222         sk->sk_v6_daddr = usin->sin6_addr;
223         np->flow_label = fl6.flowlabel;
224
225         /*
226          *      TCP over IPv4
227          */
228
229         if (addr_type & IPV6_ADDR_MAPPED) {
230                 u32 exthdrlen = icsk->icsk_ext_hdr_len;
231                 struct sockaddr_in sin;
232
233                 if (__ipv6_only_sock(sk))
234                         return -ENETUNREACH;
235
236                 sin.sin_family = AF_INET;
237                 sin.sin_port = usin->sin6_port;
238                 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
239
240                 icsk->icsk_af_ops = &ipv6_mapped;
241                 if (sk_is_mptcp(sk))
242                         mptcpv6_handle_mapped(sk, true);
243                 sk->sk_backlog_rcv = tcp_v4_do_rcv;
244 #ifdef CONFIG_TCP_MD5SIG
245                 tp->af_specific = &tcp_sock_ipv6_mapped_specific;
246 #endif
247
248                 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
249
250                 if (err) {
251                         icsk->icsk_ext_hdr_len = exthdrlen;
252                         icsk->icsk_af_ops = &ipv6_specific;
253                         if (sk_is_mptcp(sk))
254                                 mptcpv6_handle_mapped(sk, false);
255                         sk->sk_backlog_rcv = tcp_v6_do_rcv;
256 #ifdef CONFIG_TCP_MD5SIG
257                         tp->af_specific = &tcp_sock_ipv6_specific;
258 #endif
259                         goto failure;
260                 }
261                 np->saddr = sk->sk_v6_rcv_saddr;
262
263                 return err;
264         }
265
266         if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
267                 saddr = &sk->sk_v6_rcv_saddr;
268
269         fl6.flowi6_proto = IPPROTO_TCP;
270         fl6.daddr = sk->sk_v6_daddr;
271         fl6.saddr = saddr ? *saddr : np->saddr;
272         fl6.flowi6_oif = sk->sk_bound_dev_if;
273         fl6.flowi6_mark = sk->sk_mark;
274         fl6.fl6_dport = usin->sin6_port;
275         fl6.fl6_sport = inet->inet_sport;
276         fl6.flowi6_uid = sk->sk_uid;
277
278         opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
279         final_p = fl6_update_dst(&fl6, opt, &final);
280
281         security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
282
283         dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
284         if (IS_ERR(dst)) {
285                 err = PTR_ERR(dst);
286                 goto failure;
287         }
288
289         if (!saddr) {
290                 saddr = &fl6.saddr;
291                 sk->sk_v6_rcv_saddr = *saddr;
292         }
293
294         /* set the source address */
295         np->saddr = *saddr;
296         inet->inet_rcv_saddr = LOOPBACK4_IPV6;
297
298         sk->sk_gso_type = SKB_GSO_TCPV6;
299         ip6_dst_store(sk, dst, NULL, NULL);
300
301         icsk->icsk_ext_hdr_len = 0;
302         if (opt)
303                 icsk->icsk_ext_hdr_len = opt->opt_flen +
304                                          opt->opt_nflen;
305
306         tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
307
308         inet->inet_dport = usin->sin6_port;
309
310         tcp_set_state(sk, TCP_SYN_SENT);
311         err = inet6_hash_connect(tcp_death_row, sk);
312         if (err)
313                 goto late_failure;
314
315         sk_set_txhash(sk);
316
317         if (likely(!tp->repair)) {
318                 if (!tp->write_seq)
319                         WRITE_ONCE(tp->write_seq,
320                                    secure_tcpv6_seq(np->saddr.s6_addr32,
321                                                     sk->sk_v6_daddr.s6_addr32,
322                                                     inet->inet_sport,
323                                                     inet->inet_dport));
324                 tp->tsoffset = secure_tcpv6_ts_off(sock_net(sk),
325                                                    np->saddr.s6_addr32,
326                                                    sk->sk_v6_daddr.s6_addr32);
327         }
328
329         if (tcp_fastopen_defer_connect(sk, &err))
330                 return err;
331         if (err)
332                 goto late_failure;
333
334         err = tcp_connect(sk);
335         if (err)
336                 goto late_failure;
337
338         return 0;
339
340 late_failure:
341         tcp_set_state(sk, TCP_CLOSE);
342 failure:
343         inet->inet_dport = 0;
344         sk->sk_route_caps = 0;
345         return err;
346 }
347
348 static void tcp_v6_mtu_reduced(struct sock *sk)
349 {
350         struct dst_entry *dst;
351         u32 mtu;
352
353         if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
354                 return;
355
356         mtu = READ_ONCE(tcp_sk(sk)->mtu_info);
357
358         /* Drop requests trying to increase our current mss.
359          * Check done in __ip6_rt_update_pmtu() is too late.
360          */
361         if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache)
362                 return;
363
364         dst = inet6_csk_update_pmtu(sk, mtu);
365         if (!dst)
366                 return;
367
368         if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
369                 tcp_sync_mss(sk, dst_mtu(dst));
370                 tcp_simple_retransmit(sk);
371         }
372 }
373
374 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
375                 u8 type, u8 code, int offset, __be32 info)
376 {
377         const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
378         const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
379         struct net *net = dev_net(skb->dev);
380         struct request_sock *fastopen;
381         struct ipv6_pinfo *np;
382         struct tcp_sock *tp;
383         __u32 seq, snd_una;
384         struct sock *sk;
385         bool fatal;
386         int err;
387
388         sk = __inet6_lookup_established(net, &tcp_hashinfo,
389                                         &hdr->daddr, th->dest,
390                                         &hdr->saddr, ntohs(th->source),
391                                         skb->dev->ifindex, inet6_sdif(skb));
392
393         if (!sk) {
394                 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
395                                   ICMP6_MIB_INERRORS);
396                 return -ENOENT;
397         }
398
399         if (sk->sk_state == TCP_TIME_WAIT) {
400                 inet_twsk_put(inet_twsk(sk));
401                 return 0;
402         }
403         seq = ntohl(th->seq);
404         fatal = icmpv6_err_convert(type, code, &err);
405         if (sk->sk_state == TCP_NEW_SYN_RECV) {
406                 tcp_req_err(sk, seq, fatal);
407                 return 0;
408         }
409
410         bh_lock_sock(sk);
411         if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
412                 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
413
414         if (sk->sk_state == TCP_CLOSE)
415                 goto out;
416
417         if (ipv6_hdr(skb)->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
418                 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
419                 goto out;
420         }
421
422         tp = tcp_sk(sk);
423         /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
424         fastopen = rcu_dereference(tp->fastopen_rsk);
425         snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
426         if (sk->sk_state != TCP_LISTEN &&
427             !between(seq, snd_una, tp->snd_nxt)) {
428                 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
429                 goto out;
430         }
431
432         np = tcp_inet6_sk(sk);
433
434         if (type == NDISC_REDIRECT) {
435                 if (!sock_owned_by_user(sk)) {
436                         struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
437
438                         if (dst)
439                                 dst->ops->redirect(dst, sk, skb);
440                 }
441                 goto out;
442         }
443
444         if (type == ICMPV6_PKT_TOOBIG) {
445                 u32 mtu = ntohl(info);
446
447                 /* We are not interested in TCP_LISTEN and open_requests
448                  * (SYN-ACKs send out by Linux are always <576bytes so
449                  * they should go through unfragmented).
450                  */
451                 if (sk->sk_state == TCP_LISTEN)
452                         goto out;
453
454                 if (!ip6_sk_accept_pmtu(sk))
455                         goto out;
456
457                 if (mtu < IPV6_MIN_MTU)
458                         goto out;
459
460                 WRITE_ONCE(tp->mtu_info, mtu);
461
462                 if (!sock_owned_by_user(sk))
463                         tcp_v6_mtu_reduced(sk);
464                 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
465                                            &sk->sk_tsq_flags))
466                         sock_hold(sk);
467                 goto out;
468         }
469
470
471         /* Might be for an request_sock */
472         switch (sk->sk_state) {
473         case TCP_SYN_SENT:
474         case TCP_SYN_RECV:
475                 /* Only in fast or simultaneous open. If a fast open socket is
476                  * already accepted it is treated as a connected one below.
477                  */
478                 if (fastopen && !fastopen->sk)
479                         break;
480
481                 ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th);
482
483                 if (!sock_owned_by_user(sk)) {
484                         sk->sk_err = err;
485                         sk_error_report(sk);            /* Wake people up to see the error (see connect in sock.c) */
486
487                         tcp_done(sk);
488                 } else
489                         sk->sk_err_soft = err;
490                 goto out;
491         case TCP_LISTEN:
492                 break;
493         default:
494                 /* check if this ICMP message allows revert of backoff.
495                  * (see RFC 6069)
496                  */
497                 if (!fastopen && type == ICMPV6_DEST_UNREACH &&
498                     code == ICMPV6_NOROUTE)
499                         tcp_ld_RTO_revert(sk, seq);
500         }
501
502         if (!sock_owned_by_user(sk) && np->recverr) {
503                 sk->sk_err = err;
504                 sk_error_report(sk);
505         } else
506                 sk->sk_err_soft = err;
507
508 out:
509         bh_unlock_sock(sk);
510         sock_put(sk);
511         return 0;
512 }
513
514
515 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
516                               struct flowi *fl,
517                               struct request_sock *req,
518                               struct tcp_fastopen_cookie *foc,
519                               enum tcp_synack_type synack_type,
520                               struct sk_buff *syn_skb)
521 {
522         struct inet_request_sock *ireq = inet_rsk(req);
523         struct ipv6_pinfo *np = tcp_inet6_sk(sk);
524         struct ipv6_txoptions *opt;
525         struct flowi6 *fl6 = &fl->u.ip6;
526         struct sk_buff *skb;
527         int err = -ENOMEM;
528         u8 tclass;
529
530         /* First, grab a route. */
531         if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
532                                                IPPROTO_TCP)) == NULL)
533                 goto done;
534
535         skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb);
536
537         if (skb) {
538                 __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
539                                     &ireq->ir_v6_rmt_addr);
540
541                 fl6->daddr = ireq->ir_v6_rmt_addr;
542                 if (np->repflow && ireq->pktopts)
543                         fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
544
545                 tclass = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ?
546                                 (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
547                                 (np->tclass & INET_ECN_MASK) :
548                                 np->tclass;
549
550                 if (!INET_ECN_is_capable(tclass) &&
551                     tcp_bpf_ca_needs_ecn((struct sock *)req))
552                         tclass |= INET_ECN_ECT_0;
553
554                 rcu_read_lock();
555                 opt = ireq->ipv6_opt;
556                 if (!opt)
557                         opt = rcu_dereference(np->opt);
558                 err = ip6_xmit(sk, skb, fl6, skb->mark ? : sk->sk_mark, opt,
559                                tclass, sk->sk_priority);
560                 rcu_read_unlock();
561                 err = net_xmit_eval(err);
562         }
563
564 done:
565         return err;
566 }
567
568
569 static void tcp_v6_reqsk_destructor(struct request_sock *req)
570 {
571         kfree(inet_rsk(req)->ipv6_opt);
572         kfree_skb(inet_rsk(req)->pktopts);
573 }
574
575 #ifdef CONFIG_TCP_MD5SIG
576 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
577                                                    const struct in6_addr *addr,
578                                                    int l3index)
579 {
580         return tcp_md5_do_lookup(sk, l3index,
581                                  (union tcp_md5_addr *)addr, AF_INET6);
582 }
583
584 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
585                                                 const struct sock *addr_sk)
586 {
587         int l3index;
588
589         l3index = l3mdev_master_ifindex_by_index(sock_net(sk),
590                                                  addr_sk->sk_bound_dev_if);
591         return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr,
592                                     l3index);
593 }
594
595 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
596                                  sockptr_t optval, int optlen)
597 {
598         struct tcp_md5sig cmd;
599         struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
600         int l3index = 0;
601         u8 prefixlen;
602         u8 flags;
603
604         if (optlen < sizeof(cmd))
605                 return -EINVAL;
606
607         if (copy_from_sockptr(&cmd, optval, sizeof(cmd)))
608                 return -EFAULT;
609
610         if (sin6->sin6_family != AF_INET6)
611                 return -EINVAL;
612
613         flags = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX;
614
615         if (optname == TCP_MD5SIG_EXT &&
616             cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
617                 prefixlen = cmd.tcpm_prefixlen;
618                 if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
619                                         prefixlen > 32))
620                         return -EINVAL;
621         } else {
622                 prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
623         }
624
625         if (optname == TCP_MD5SIG_EXT && cmd.tcpm_ifindex &&
626             cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) {
627                 struct net_device *dev;
628
629                 rcu_read_lock();
630                 dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex);
631                 if (dev && netif_is_l3_master(dev))
632                         l3index = dev->ifindex;
633                 rcu_read_unlock();
634
635                 /* ok to reference set/not set outside of rcu;
636                  * right now device MUST be an L3 master
637                  */
638                 if (!dev || !l3index)
639                         return -EINVAL;
640         }
641
642         if (!cmd.tcpm_keylen) {
643                 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
644                         return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
645                                               AF_INET, prefixlen,
646                                               l3index, flags);
647                 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
648                                       AF_INET6, prefixlen, l3index, flags);
649         }
650
651         if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
652                 return -EINVAL;
653
654         if (ipv6_addr_v4mapped(&sin6->sin6_addr))
655                 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
656                                       AF_INET, prefixlen, l3index, flags,
657                                       cmd.tcpm_key, cmd.tcpm_keylen,
658                                       GFP_KERNEL);
659
660         return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
661                               AF_INET6, prefixlen, l3index, flags,
662                               cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
663 }
664
665 static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
666                                    const struct in6_addr *daddr,
667                                    const struct in6_addr *saddr,
668                                    const struct tcphdr *th, int nbytes)
669 {
670         struct tcp6_pseudohdr *bp;
671         struct scatterlist sg;
672         struct tcphdr *_th;
673
674         bp = hp->scratch;
675         /* 1. TCP pseudo-header (RFC2460) */
676         bp->saddr = *saddr;
677         bp->daddr = *daddr;
678         bp->protocol = cpu_to_be32(IPPROTO_TCP);
679         bp->len = cpu_to_be32(nbytes);
680
681         _th = (struct tcphdr *)(bp + 1);
682         memcpy(_th, th, sizeof(*th));
683         _th->check = 0;
684
685         sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
686         ahash_request_set_crypt(hp->md5_req, &sg, NULL,
687                                 sizeof(*bp) + sizeof(*th));
688         return crypto_ahash_update(hp->md5_req);
689 }
690
691 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
692                                const struct in6_addr *daddr, struct in6_addr *saddr,
693                                const struct tcphdr *th)
694 {
695         struct tcp_md5sig_pool *hp;
696         struct ahash_request *req;
697
698         hp = tcp_get_md5sig_pool();
699         if (!hp)
700                 goto clear_hash_noput;
701         req = hp->md5_req;
702
703         if (crypto_ahash_init(req))
704                 goto clear_hash;
705         if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
706                 goto clear_hash;
707         if (tcp_md5_hash_key(hp, key))
708                 goto clear_hash;
709         ahash_request_set_crypt(req, NULL, md5_hash, 0);
710         if (crypto_ahash_final(req))
711                 goto clear_hash;
712
713         tcp_put_md5sig_pool();
714         return 0;
715
716 clear_hash:
717         tcp_put_md5sig_pool();
718 clear_hash_noput:
719         memset(md5_hash, 0, 16);
720         return 1;
721 }
722
723 static int tcp_v6_md5_hash_skb(char *md5_hash,
724                                const struct tcp_md5sig_key *key,
725                                const struct sock *sk,
726                                const struct sk_buff *skb)
727 {
728         const struct in6_addr *saddr, *daddr;
729         struct tcp_md5sig_pool *hp;
730         struct ahash_request *req;
731         const struct tcphdr *th = tcp_hdr(skb);
732
733         if (sk) { /* valid for establish/request sockets */
734                 saddr = &sk->sk_v6_rcv_saddr;
735                 daddr = &sk->sk_v6_daddr;
736         } else {
737                 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
738                 saddr = &ip6h->saddr;
739                 daddr = &ip6h->daddr;
740         }
741
742         hp = tcp_get_md5sig_pool();
743         if (!hp)
744                 goto clear_hash_noput;
745         req = hp->md5_req;
746
747         if (crypto_ahash_init(req))
748                 goto clear_hash;
749
750         if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
751                 goto clear_hash;
752         if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
753                 goto clear_hash;
754         if (tcp_md5_hash_key(hp, key))
755                 goto clear_hash;
756         ahash_request_set_crypt(req, NULL, md5_hash, 0);
757         if (crypto_ahash_final(req))
758                 goto clear_hash;
759
760         tcp_put_md5sig_pool();
761         return 0;
762
763 clear_hash:
764         tcp_put_md5sig_pool();
765 clear_hash_noput:
766         memset(md5_hash, 0, 16);
767         return 1;
768 }
769
770 #endif
771
772 static bool tcp_v6_inbound_md5_hash(const struct sock *sk,
773                                     const struct sk_buff *skb,
774                                     int dif, int sdif)
775 {
776 #ifdef CONFIG_TCP_MD5SIG
777         const __u8 *hash_location = NULL;
778         struct tcp_md5sig_key *hash_expected;
779         const struct ipv6hdr *ip6h = ipv6_hdr(skb);
780         const struct tcphdr *th = tcp_hdr(skb);
781         int genhash, l3index;
782         u8 newhash[16];
783
784         /* sdif set, means packet ingressed via a device
785          * in an L3 domain and dif is set to the l3mdev
786          */
787         l3index = sdif ? dif : 0;
788
789         hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr, l3index);
790         hash_location = tcp_parse_md5sig_option(th);
791
792         /* We've parsed the options - do we have a hash? */
793         if (!hash_expected && !hash_location)
794                 return false;
795
796         if (hash_expected && !hash_location) {
797                 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
798                 return true;
799         }
800
801         if (!hash_expected && hash_location) {
802                 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
803                 return true;
804         }
805
806         /* check the signature */
807         genhash = tcp_v6_md5_hash_skb(newhash,
808                                       hash_expected,
809                                       NULL, skb);
810
811         if (genhash || memcmp(hash_location, newhash, 16) != 0) {
812                 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
813                 net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u L3 index %d\n",
814                                      genhash ? "failed" : "mismatch",
815                                      &ip6h->saddr, ntohs(th->source),
816                                      &ip6h->daddr, ntohs(th->dest), l3index);
817                 return true;
818         }
819 #endif
820         return false;
821 }
822
823 static void tcp_v6_init_req(struct request_sock *req,
824                             const struct sock *sk_listener,
825                             struct sk_buff *skb)
826 {
827         bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
828         struct inet_request_sock *ireq = inet_rsk(req);
829         const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener);
830
831         ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
832         ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
833
834         /* So that link locals have meaning */
835         if ((!sk_listener->sk_bound_dev_if || l3_slave) &&
836             ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
837                 ireq->ir_iif = tcp_v6_iif(skb);
838
839         if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
840             (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
841              np->rxopt.bits.rxinfo ||
842              np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
843              np->rxopt.bits.rxohlim || np->repflow)) {
844                 refcount_inc(&skb->users);
845                 ireq->pktopts = skb;
846         }
847 }
848
849 static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
850                                           struct sk_buff *skb,
851                                           struct flowi *fl,
852                                           struct request_sock *req)
853 {
854         tcp_v6_init_req(req, sk, skb);
855
856         if (security_inet_conn_request(sk, skb, req))
857                 return NULL;
858
859         return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
860 }
861
862 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
863         .family         =       AF_INET6,
864         .obj_size       =       sizeof(struct tcp6_request_sock),
865         .rtx_syn_ack    =       tcp_rtx_synack,
866         .send_ack       =       tcp_v6_reqsk_send_ack,
867         .destructor     =       tcp_v6_reqsk_destructor,
868         .send_reset     =       tcp_v6_send_reset,
869         .syn_ack_timeout =      tcp_syn_ack_timeout,
870 };
871
872 const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
873         .mss_clamp      =       IPV6_MIN_MTU - sizeof(struct tcphdr) -
874                                 sizeof(struct ipv6hdr),
875 #ifdef CONFIG_TCP_MD5SIG
876         .req_md5_lookup =       tcp_v6_md5_lookup,
877         .calc_md5_hash  =       tcp_v6_md5_hash_skb,
878 #endif
879 #ifdef CONFIG_SYN_COOKIES
880         .cookie_init_seq =      cookie_v6_init_sequence,
881 #endif
882         .route_req      =       tcp_v6_route_req,
883         .init_seq       =       tcp_v6_init_seq,
884         .init_ts_off    =       tcp_v6_init_ts_off,
885         .send_synack    =       tcp_v6_send_synack,
886 };
887
888 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
889                                  u32 ack, u32 win, u32 tsval, u32 tsecr,
890                                  int oif, struct tcp_md5sig_key *key, int rst,
891                                  u8 tclass, __be32 label, u32 priority)
892 {
893         const struct tcphdr *th = tcp_hdr(skb);
894         struct tcphdr *t1;
895         struct sk_buff *buff;
896         struct flowi6 fl6;
897         struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
898         struct sock *ctl_sk = net->ipv6.tcp_sk;
899         unsigned int tot_len = sizeof(struct tcphdr);
900         __be32 mrst = 0, *topt;
901         struct dst_entry *dst;
902         __u32 mark = 0;
903
904         if (tsecr)
905                 tot_len += TCPOLEN_TSTAMP_ALIGNED;
906 #ifdef CONFIG_TCP_MD5SIG
907         if (key)
908                 tot_len += TCPOLEN_MD5SIG_ALIGNED;
909 #endif
910
911 #ifdef CONFIG_MPTCP
912         if (rst && !key) {
913                 mrst = mptcp_reset_option(skb);
914
915                 if (mrst)
916                         tot_len += sizeof(__be32);
917         }
918 #endif
919
920         buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
921                          GFP_ATOMIC);
922         if (!buff)
923                 return;
924
925         skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
926
927         t1 = skb_push(buff, tot_len);
928         skb_reset_transport_header(buff);
929
930         /* Swap the send and the receive. */
931         memset(t1, 0, sizeof(*t1));
932         t1->dest = th->source;
933         t1->source = th->dest;
934         t1->doff = tot_len / 4;
935         t1->seq = htonl(seq);
936         t1->ack_seq = htonl(ack);
937         t1->ack = !rst || !th->ack;
938         t1->rst = rst;
939         t1->window = htons(win);
940
941         topt = (__be32 *)(t1 + 1);
942
943         if (tsecr) {
944                 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
945                                 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
946                 *topt++ = htonl(tsval);
947                 *topt++ = htonl(tsecr);
948         }
949
950         if (mrst)
951                 *topt++ = mrst;
952
953 #ifdef CONFIG_TCP_MD5SIG
954         if (key) {
955                 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
956                                 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
957                 tcp_v6_md5_hash_hdr((__u8 *)topt, key,
958                                     &ipv6_hdr(skb)->saddr,
959                                     &ipv6_hdr(skb)->daddr, t1);
960         }
961 #endif
962
963         memset(&fl6, 0, sizeof(fl6));
964         fl6.daddr = ipv6_hdr(skb)->saddr;
965         fl6.saddr = ipv6_hdr(skb)->daddr;
966         fl6.flowlabel = label;
967
968         buff->ip_summed = CHECKSUM_PARTIAL;
969         buff->csum = 0;
970
971         __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
972
973         fl6.flowi6_proto = IPPROTO_TCP;
974         if (rt6_need_strict(&fl6.daddr) && !oif)
975                 fl6.flowi6_oif = tcp_v6_iif(skb);
976         else {
977                 if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
978                         oif = skb->skb_iif;
979
980                 fl6.flowi6_oif = oif;
981         }
982
983         if (sk) {
984                 if (sk->sk_state == TCP_TIME_WAIT) {
985                         mark = inet_twsk(sk)->tw_mark;
986                         /* autoflowlabel relies on buff->hash */
987                         skb_set_hash(buff, inet_twsk(sk)->tw_txhash,
988                                      PKT_HASH_TYPE_L4);
989                 } else {
990                         mark = sk->sk_mark;
991                 }
992                 buff->tstamp = tcp_transmit_time(sk);
993         }
994         fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark;
995         fl6.fl6_dport = t1->dest;
996         fl6.fl6_sport = t1->source;
997         fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
998         security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
999
1000         /* Pass a socket to ip6_dst_lookup either it is for RST
1001          * Underlying function will use this to retrieve the network
1002          * namespace
1003          */
1004         dst = ip6_dst_lookup_flow(sock_net(ctl_sk), ctl_sk, &fl6, NULL);
1005         if (!IS_ERR(dst)) {
1006                 skb_dst_set(buff, dst);
1007                 ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL,
1008                          tclass & ~INET_ECN_MASK, priority);
1009                 TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
1010                 if (rst)
1011                         TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
1012                 return;
1013         }
1014
1015         kfree_skb(buff);
1016 }
1017
1018 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
1019 {
1020         const struct tcphdr *th = tcp_hdr(skb);
1021         struct ipv6hdr *ipv6h = ipv6_hdr(skb);
1022         u32 seq = 0, ack_seq = 0;
1023         struct tcp_md5sig_key *key = NULL;
1024 #ifdef CONFIG_TCP_MD5SIG
1025         const __u8 *hash_location = NULL;
1026         unsigned char newhash[16];
1027         int genhash;
1028         struct sock *sk1 = NULL;
1029 #endif
1030         __be32 label = 0;
1031         u32 priority = 0;
1032         struct net *net;
1033         int oif = 0;
1034
1035         if (th->rst)
1036                 return;
1037
1038         /* If sk not NULL, it means we did a successful lookup and incoming
1039          * route had to be correct. prequeue might have dropped our dst.
1040          */
1041         if (!sk && !ipv6_unicast_destination(skb))
1042                 return;
1043
1044         net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
1045 #ifdef CONFIG_TCP_MD5SIG
1046         rcu_read_lock();
1047         hash_location = tcp_parse_md5sig_option(th);
1048         if (sk && sk_fullsock(sk)) {
1049                 int l3index;
1050
1051                 /* sdif set, means packet ingressed via a device
1052                  * in an L3 domain and inet_iif is set to it.
1053                  */
1054                 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1055                 key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index);
1056         } else if (hash_location) {
1057                 int dif = tcp_v6_iif_l3_slave(skb);
1058                 int sdif = tcp_v6_sdif(skb);
1059                 int l3index;
1060
1061                 /*
1062                  * active side is lost. Try to find listening socket through
1063                  * source port, and then find md5 key through listening socket.
1064                  * we are not loose security here:
1065                  * Incoming packet is checked with md5 hash with finding key,
1066                  * no RST generated if md5 hash doesn't match.
1067                  */
1068                 sk1 = inet6_lookup_listener(net,
1069                                            &tcp_hashinfo, NULL, 0,
1070                                            &ipv6h->saddr,
1071                                            th->source, &ipv6h->daddr,
1072                                            ntohs(th->source), dif, sdif);
1073                 if (!sk1)
1074                         goto out;
1075
1076                 /* sdif set, means packet ingressed via a device
1077                  * in an L3 domain and dif is set to it.
1078                  */
1079                 l3index = tcp_v6_sdif(skb) ? dif : 0;
1080
1081                 key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index);
1082                 if (!key)
1083                         goto out;
1084
1085                 genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
1086                 if (genhash || memcmp(hash_location, newhash, 16) != 0)
1087                         goto out;
1088         }
1089 #endif
1090
1091         if (th->ack)
1092                 seq = ntohl(th->ack_seq);
1093         else
1094                 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
1095                           (th->doff << 2);
1096
1097         if (sk) {
1098                 oif = sk->sk_bound_dev_if;
1099                 if (sk_fullsock(sk)) {
1100                         const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1101
1102                         trace_tcp_send_reset(sk, skb);
1103                         if (np->repflow)
1104                                 label = ip6_flowlabel(ipv6h);
1105                         priority = sk->sk_priority;
1106                 }
1107                 if (sk->sk_state == TCP_TIME_WAIT) {
1108                         label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel);
1109                         priority = inet_twsk(sk)->tw_priority;
1110                 }
1111         } else {
1112                 if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET)
1113                         label = ip6_flowlabel(ipv6h);
1114         }
1115
1116         tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1,
1117                              ipv6_get_dsfield(ipv6h), label, priority);
1118
1119 #ifdef CONFIG_TCP_MD5SIG
1120 out:
1121         rcu_read_unlock();
1122 #endif
1123 }
1124
1125 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
1126                             u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
1127                             struct tcp_md5sig_key *key, u8 tclass,
1128                             __be32 label, u32 priority)
1129 {
1130         tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
1131                              tclass, label, priority);
1132 }
1133
1134 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1135 {
1136         struct inet_timewait_sock *tw = inet_twsk(sk);
1137         struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1138
1139         tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1140                         tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1141                         tcp_time_stamp_raw() + tcptw->tw_ts_offset,
1142                         tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
1143                         tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority);
1144
1145         inet_twsk_put(tw);
1146 }
1147
1148 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
1149                                   struct request_sock *req)
1150 {
1151         int l3index;
1152
1153         l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1154
1155         /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
1156          * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
1157          */
1158         /* RFC 7323 2.3
1159          * The window field (SEG.WND) of every outgoing segment, with the
1160          * exception of <SYN> segments, MUST be right-shifted by
1161          * Rcv.Wind.Shift bits:
1162          */
1163         tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
1164                         tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
1165                         tcp_rsk(req)->rcv_nxt,
1166                         req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
1167                         tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
1168                         req->ts_recent, sk->sk_bound_dev_if,
1169                         tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index),
1170                         ipv6_get_dsfield(ipv6_hdr(skb)), 0, sk->sk_priority);
1171 }
1172
1173
1174 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
1175 {
1176 #ifdef CONFIG_SYN_COOKIES
1177         const struct tcphdr *th = tcp_hdr(skb);
1178
1179         if (!th->syn)
1180                 sk = cookie_v6_check(sk, skb);
1181 #endif
1182         return sk;
1183 }
1184
1185 u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
1186                          struct tcphdr *th, u32 *cookie)
1187 {
1188         u16 mss = 0;
1189 #ifdef CONFIG_SYN_COOKIES
1190         mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops,
1191                                     &tcp_request_sock_ipv6_ops, sk, th);
1192         if (mss) {
1193                 *cookie = __cookie_v6_init_sequence(iph, th, &mss);
1194                 tcp_synq_overflow(sk);
1195         }
1196 #endif
1197         return mss;
1198 }
1199
1200 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1201 {
1202         if (skb->protocol == htons(ETH_P_IP))
1203                 return tcp_v4_conn_request(sk, skb);
1204
1205         if (!ipv6_unicast_destination(skb))
1206                 goto drop;
1207
1208         if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) {
1209                 __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS);
1210                 return 0;
1211         }
1212
1213         return tcp_conn_request(&tcp6_request_sock_ops,
1214                                 &tcp_request_sock_ipv6_ops, sk, skb);
1215
1216 drop:
1217         tcp_listendrop(sk);
1218         return 0; /* don't send reset */
1219 }
1220
1221 static void tcp_v6_restore_cb(struct sk_buff *skb)
1222 {
1223         /* We need to move header back to the beginning if xfrm6_policy_check()
1224          * and tcp_v6_fill_cb() are going to be called again.
1225          * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
1226          */
1227         memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
1228                 sizeof(struct inet6_skb_parm));
1229 }
1230
1231 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1232                                          struct request_sock *req,
1233                                          struct dst_entry *dst,
1234                                          struct request_sock *req_unhash,
1235                                          bool *own_req)
1236 {
1237         struct inet_request_sock *ireq;
1238         struct ipv6_pinfo *newnp;
1239         const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1240         struct ipv6_txoptions *opt;
1241         struct inet_sock *newinet;
1242         bool found_dup_sk = false;
1243         struct tcp_sock *newtp;
1244         struct sock *newsk;
1245 #ifdef CONFIG_TCP_MD5SIG
1246         struct tcp_md5sig_key *key;
1247         int l3index;
1248 #endif
1249         struct flowi6 fl6;
1250
1251         if (skb->protocol == htons(ETH_P_IP)) {
1252                 /*
1253                  *      v6 mapped
1254                  */
1255
1256                 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
1257                                              req_unhash, own_req);
1258
1259                 if (!newsk)
1260                         return NULL;
1261
1262                 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1263
1264                 newinet = inet_sk(newsk);
1265                 newnp = tcp_inet6_sk(newsk);
1266                 newtp = tcp_sk(newsk);
1267
1268                 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1269
1270                 newnp->saddr = newsk->sk_v6_rcv_saddr;
1271
1272                 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1273                 if (sk_is_mptcp(newsk))
1274                         mptcpv6_handle_mapped(newsk, true);
1275                 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1276 #ifdef CONFIG_TCP_MD5SIG
1277                 newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1278 #endif
1279
1280                 newnp->ipv6_mc_list = NULL;
1281                 newnp->ipv6_ac_list = NULL;
1282                 newnp->ipv6_fl_list = NULL;
1283                 newnp->pktoptions  = NULL;
1284                 newnp->opt         = NULL;
1285                 newnp->mcast_oif   = inet_iif(skb);
1286                 newnp->mcast_hops  = ip_hdr(skb)->ttl;
1287                 newnp->rcv_flowinfo = 0;
1288                 if (np->repflow)
1289                         newnp->flow_label = 0;
1290
1291                 /*
1292                  * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1293                  * here, tcp_create_openreq_child now does this for us, see the comment in
1294                  * that function for the gory details. -acme
1295                  */
1296
1297                 /* It is tricky place. Until this moment IPv4 tcp
1298                    worked with IPv6 icsk.icsk_af_ops.
1299                    Sync it now.
1300                  */
1301                 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1302
1303                 return newsk;
1304         }
1305
1306         ireq = inet_rsk(req);
1307
1308         if (sk_acceptq_is_full(sk))
1309                 goto out_overflow;
1310
1311         if (!dst) {
1312                 dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
1313                 if (!dst)
1314                         goto out;
1315         }
1316
1317         newsk = tcp_create_openreq_child(sk, req, skb);
1318         if (!newsk)
1319                 goto out_nonewsk;
1320
1321         /*
1322          * No need to charge this sock to the relevant IPv6 refcnt debug socks
1323          * count here, tcp_create_openreq_child now does this for us, see the
1324          * comment in that function for the gory details. -acme
1325          */
1326
1327         newsk->sk_gso_type = SKB_GSO_TCPV6;
1328         ip6_dst_store(newsk, dst, NULL, NULL);
1329         inet6_sk_rx_dst_set(newsk, skb);
1330
1331         inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1332
1333         newtp = tcp_sk(newsk);
1334         newinet = inet_sk(newsk);
1335         newnp = tcp_inet6_sk(newsk);
1336
1337         memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1338
1339         newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
1340         newnp->saddr = ireq->ir_v6_loc_addr;
1341         newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
1342         newsk->sk_bound_dev_if = ireq->ir_iif;
1343
1344         /* Now IPv6 options...
1345
1346            First: no IPv4 options.
1347          */
1348         newinet->inet_opt = NULL;
1349         newnp->ipv6_mc_list = NULL;
1350         newnp->ipv6_ac_list = NULL;
1351         newnp->ipv6_fl_list = NULL;
1352
1353         /* Clone RX bits */
1354         newnp->rxopt.all = np->rxopt.all;
1355
1356         newnp->pktoptions = NULL;
1357         newnp->opt        = NULL;
1358         newnp->mcast_oif  = tcp_v6_iif(skb);
1359         newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1360         newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1361         if (np->repflow)
1362                 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1363
1364         /* Set ToS of the new socket based upon the value of incoming SYN.
1365          * ECT bits are set later in tcp_init_transfer().
1366          */
1367         if (sock_net(sk)->ipv4.sysctl_tcp_reflect_tos)
1368                 newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;
1369
1370         /* Clone native IPv6 options from listening socket (if any)
1371
1372            Yes, keeping reference count would be much more clever,
1373            but we make one more one thing there: reattach optmem
1374            to newsk.
1375          */
1376         opt = ireq->ipv6_opt;
1377         if (!opt)
1378                 opt = rcu_dereference(np->opt);
1379         if (opt) {
1380                 opt = ipv6_dup_options(newsk, opt);
1381                 RCU_INIT_POINTER(newnp->opt, opt);
1382         }
1383         inet_csk(newsk)->icsk_ext_hdr_len = 0;
1384         if (opt)
1385                 inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
1386                                                     opt->opt_flen;
1387
1388         tcp_ca_openreq_child(newsk, dst);
1389
1390         tcp_sync_mss(newsk, dst_mtu(dst));
1391         newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1392
1393         tcp_initialize_rcv_mss(newsk);
1394
1395         newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1396         newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1397
1398 #ifdef CONFIG_TCP_MD5SIG
1399         l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif);
1400
1401         /* Copy over the MD5 key from the original socket */
1402         key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index);
1403         if (key) {
1404                 /* We're using one, so create a matching key
1405                  * on the newsk structure. If we fail to get
1406                  * memory, then we end up not copying the key
1407                  * across. Shucks.
1408                  */
1409                 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr,
1410                                AF_INET6, 128, l3index, key->flags, key->key, key->keylen,
1411                                sk_gfp_mask(sk, GFP_ATOMIC));
1412         }
1413 #endif
1414
1415         if (__inet_inherit_port(sk, newsk) < 0) {
1416                 inet_csk_prepare_forced_close(newsk);
1417                 tcp_done(newsk);
1418                 goto out;
1419         }
1420         *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash),
1421                                        &found_dup_sk);
1422         if (*own_req) {
1423                 tcp_move_syn(newtp, req);
1424
1425                 /* Clone pktoptions received with SYN, if we own the req */
1426                 if (ireq->pktopts) {
1427                         newnp->pktoptions = skb_clone(ireq->pktopts,
1428                                                       sk_gfp_mask(sk, GFP_ATOMIC));
1429                         consume_skb(ireq->pktopts);
1430                         ireq->pktopts = NULL;
1431                         if (newnp->pktoptions) {
1432                                 tcp_v6_restore_cb(newnp->pktoptions);
1433                                 skb_set_owner_r(newnp->pktoptions, newsk);
1434                         }
1435                 }
1436         } else {
1437                 if (!req_unhash && found_dup_sk) {
1438                         /* This code path should only be executed in the
1439                          * syncookie case only
1440                          */
1441                         bh_unlock_sock(newsk);
1442                         sock_put(newsk);
1443                         newsk = NULL;
1444                 }
1445         }
1446
1447         return newsk;
1448
1449 out_overflow:
1450         __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1451 out_nonewsk:
1452         dst_release(dst);
1453 out:
1454         tcp_listendrop(sk);
1455         return NULL;
1456 }
1457
1458 INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *,
1459                                                            u32));
1460 /* The socket must have it's spinlock held when we get
1461  * here, unless it is a TCP_LISTEN socket.
1462  *
1463  * We have a potential double-lock case here, so even when
1464  * doing backlog processing we use the BH locking scheme.
1465  * This is because we cannot sleep with the original spinlock
1466  * held.
1467  */
1468 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1469 {
1470         struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1471         struct sk_buff *opt_skb = NULL;
1472         struct tcp_sock *tp;
1473
1474         /* Imagine: socket is IPv6. IPv4 packet arrives,
1475            goes to IPv4 receive handler and backlogged.
1476            From backlog it always goes here. Kerboom...
1477            Fortunately, tcp_rcv_established and rcv_established
1478            handle them correctly, but it is not case with
1479            tcp_v6_hnd_req and tcp_v6_send_reset().   --ANK
1480          */
1481
1482         if (skb->protocol == htons(ETH_P_IP))
1483                 return tcp_v4_do_rcv(sk, skb);
1484
1485         /*
1486          *      socket locking is here for SMP purposes as backlog rcv
1487          *      is currently called with bh processing disabled.
1488          */
1489
1490         /* Do Stevens' IPV6_PKTOPTIONS.
1491
1492            Yes, guys, it is the only place in our code, where we
1493            may make it not affecting IPv4.
1494            The rest of code is protocol independent,
1495            and I do not like idea to uglify IPv4.
1496
1497            Actually, all the idea behind IPV6_PKTOPTIONS
1498            looks not very well thought. For now we latch
1499            options, received in the last packet, enqueued
1500            by tcp. Feel free to propose better solution.
1501                                                --ANK (980728)
1502          */
1503         if (np->rxopt.all)
1504                 opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC));
1505
1506         if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1507                 struct dst_entry *dst;
1508
1509                 dst = rcu_dereference_protected(sk->sk_rx_dst,
1510                                                 lockdep_sock_is_held(sk));
1511
1512                 sock_rps_save_rxhash(sk, skb);
1513                 sk_mark_napi_id(sk, skb);
1514                 if (dst) {
1515                         if (sk->sk_rx_dst_ifindex != skb->skb_iif ||
1516                             INDIRECT_CALL_1(dst->ops->check, ip6_dst_check,
1517                                             dst, sk->sk_rx_dst_cookie) == NULL) {
1518                                 RCU_INIT_POINTER(sk->sk_rx_dst, NULL);
1519                                 dst_release(dst);
1520                         }
1521                 }
1522
1523                 tcp_rcv_established(sk, skb);
1524                 if (opt_skb)
1525                         goto ipv6_pktoptions;
1526                 return 0;
1527         }
1528
1529         if (tcp_checksum_complete(skb))
1530                 goto csum_err;
1531
1532         if (sk->sk_state == TCP_LISTEN) {
1533                 struct sock *nsk = tcp_v6_cookie_check(sk, skb);
1534
1535                 if (!nsk)
1536                         goto discard;
1537
1538                 if (nsk != sk) {
1539                         if (tcp_child_process(sk, nsk, skb))
1540                                 goto reset;
1541                         if (opt_skb)
1542                                 __kfree_skb(opt_skb);
1543                         return 0;
1544                 }
1545         } else
1546                 sock_rps_save_rxhash(sk, skb);
1547
1548         if (tcp_rcv_state_process(sk, skb))
1549                 goto reset;
1550         if (opt_skb)
1551                 goto ipv6_pktoptions;
1552         return 0;
1553
1554 reset:
1555         tcp_v6_send_reset(sk, skb);
1556 discard:
1557         if (opt_skb)
1558                 __kfree_skb(opt_skb);
1559         kfree_skb(skb);
1560         return 0;
1561 csum_err:
1562         trace_tcp_bad_csum(skb);
1563         TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1564         TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1565         goto discard;
1566
1567
1568 ipv6_pktoptions:
1569         /* Do you ask, what is it?
1570
1571            1. skb was enqueued by tcp.
1572            2. skb is added to tail of read queue, rather than out of order.
1573            3. socket is not in passive state.
1574            4. Finally, it really contains options, which user wants to receive.
1575          */
1576         tp = tcp_sk(sk);
1577         if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1578             !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1579                 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1580                         np->mcast_oif = tcp_v6_iif(opt_skb);
1581                 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1582                         np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1583                 if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1584                         np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1585                 if (np->repflow)
1586                         np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1587                 if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
1588                         skb_set_owner_r(opt_skb, sk);
1589                         tcp_v6_restore_cb(opt_skb);
1590                         opt_skb = xchg(&np->pktoptions, opt_skb);
1591                 } else {
1592                         __kfree_skb(opt_skb);
1593                         opt_skb = xchg(&np->pktoptions, NULL);
1594                 }
1595         }
1596
1597         kfree_skb(opt_skb);
1598         return 0;
1599 }
1600
1601 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
1602                            const struct tcphdr *th)
1603 {
1604         /* This is tricky: we move IP6CB at its correct location into
1605          * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
1606          * _decode_session6() uses IP6CB().
1607          * barrier() makes sure compiler won't play aliasing games.
1608          */
1609         memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
1610                 sizeof(struct inet6_skb_parm));
1611         barrier();
1612
1613         TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1614         TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1615                                     skb->len - th->doff*4);
1616         TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1617         TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1618         TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1619         TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1620         TCP_SKB_CB(skb)->sacked = 0;
1621         TCP_SKB_CB(skb)->has_rxtstamp =
1622                         skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1623 }
1624
1625 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
1626 {
1627         struct sk_buff *skb_to_free;
1628         int sdif = inet6_sdif(skb);
1629         int dif = inet6_iif(skb);
1630         const struct tcphdr *th;
1631         const struct ipv6hdr *hdr;
1632         bool refcounted;
1633         struct sock *sk;
1634         int ret;
1635         struct net *net = dev_net(skb->dev);
1636
1637         if (skb->pkt_type != PACKET_HOST)
1638                 goto discard_it;
1639
1640         /*
1641          *      Count it even if it's bad.
1642          */
1643         __TCP_INC_STATS(net, TCP_MIB_INSEGS);
1644
1645         if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1646                 goto discard_it;
1647
1648         th = (const struct tcphdr *)skb->data;
1649
1650         if (unlikely(th->doff < sizeof(struct tcphdr)/4))
1651                 goto bad_packet;
1652         if (!pskb_may_pull(skb, th->doff*4))
1653                 goto discard_it;
1654
1655         if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1656                 goto csum_error;
1657
1658         th = (const struct tcphdr *)skb->data;
1659         hdr = ipv6_hdr(skb);
1660
1661 lookup:
1662         sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th),
1663                                 th->source, th->dest, inet6_iif(skb), sdif,
1664                                 &refcounted);
1665         if (!sk)
1666                 goto no_tcp_socket;
1667
1668 process:
1669         if (sk->sk_state == TCP_TIME_WAIT)
1670                 goto do_time_wait;
1671
1672         if (sk->sk_state == TCP_NEW_SYN_RECV) {
1673                 struct request_sock *req = inet_reqsk(sk);
1674                 bool req_stolen = false;
1675                 struct sock *nsk;
1676
1677                 sk = req->rsk_listener;
1678                 if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif)) {
1679                         sk_drops_add(sk, skb);
1680                         reqsk_put(req);
1681                         goto discard_it;
1682                 }
1683                 if (tcp_checksum_complete(skb)) {
1684                         reqsk_put(req);
1685                         goto csum_error;
1686                 }
1687                 if (unlikely(sk->sk_state != TCP_LISTEN)) {
1688                         nsk = reuseport_migrate_sock(sk, req_to_sk(req), skb);
1689                         if (!nsk) {
1690                                 inet_csk_reqsk_queue_drop_and_put(sk, req);
1691                                 goto lookup;
1692                         }
1693                         sk = nsk;
1694                         /* reuseport_migrate_sock() has already held one sk_refcnt
1695                          * before returning.
1696                          */
1697                 } else {
1698                         sock_hold(sk);
1699                 }
1700                 refcounted = true;
1701                 nsk = NULL;
1702                 if (!tcp_filter(sk, skb)) {
1703                         th = (const struct tcphdr *)skb->data;
1704                         hdr = ipv6_hdr(skb);
1705                         tcp_v6_fill_cb(skb, hdr, th);
1706                         nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
1707                 }
1708                 if (!nsk) {
1709                         reqsk_put(req);
1710                         if (req_stolen) {
1711                                 /* Another cpu got exclusive access to req
1712                                  * and created a full blown socket.
1713                                  * Try to feed this packet to this socket
1714                                  * instead of discarding it.
1715                                  */
1716                                 tcp_v6_restore_cb(skb);
1717                                 sock_put(sk);
1718                                 goto lookup;
1719                         }
1720                         goto discard_and_relse;
1721                 }
1722                 if (nsk == sk) {
1723                         reqsk_put(req);
1724                         tcp_v6_restore_cb(skb);
1725                 } else if (tcp_child_process(sk, nsk, skb)) {
1726                         tcp_v6_send_reset(nsk, skb);
1727                         goto discard_and_relse;
1728                 } else {
1729                         sock_put(sk);
1730                         return 0;
1731                 }
1732         }
1733         if (hdr->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
1734                 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1735                 goto discard_and_relse;
1736         }
1737
1738         if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1739                 goto discard_and_relse;
1740
1741         if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif))
1742                 goto discard_and_relse;
1743
1744         if (tcp_filter(sk, skb))
1745                 goto discard_and_relse;
1746         th = (const struct tcphdr *)skb->data;
1747         hdr = ipv6_hdr(skb);
1748         tcp_v6_fill_cb(skb, hdr, th);
1749
1750         skb->dev = NULL;
1751
1752         if (sk->sk_state == TCP_LISTEN) {
1753                 ret = tcp_v6_do_rcv(sk, skb);
1754                 goto put_and_return;
1755         }
1756
1757         sk_incoming_cpu_update(sk);
1758
1759         bh_lock_sock_nested(sk);
1760         tcp_segs_in(tcp_sk(sk), skb);
1761         ret = 0;
1762         if (!sock_owned_by_user(sk)) {
1763                 skb_to_free = sk->sk_rx_skb_cache;
1764                 sk->sk_rx_skb_cache = NULL;
1765                 ret = tcp_v6_do_rcv(sk, skb);
1766         } else {
1767                 if (tcp_add_backlog(sk, skb))
1768                         goto discard_and_relse;
1769                 skb_to_free = NULL;
1770         }
1771         bh_unlock_sock(sk);
1772         if (skb_to_free)
1773                 __kfree_skb(skb_to_free);
1774 put_and_return:
1775         if (refcounted)
1776                 sock_put(sk);
1777         return ret ? -1 : 0;
1778
1779 no_tcp_socket:
1780         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1781                 goto discard_it;
1782
1783         tcp_v6_fill_cb(skb, hdr, th);
1784
1785         if (tcp_checksum_complete(skb)) {
1786 csum_error:
1787                 trace_tcp_bad_csum(skb);
1788                 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1789 bad_packet:
1790                 __TCP_INC_STATS(net, TCP_MIB_INERRS);
1791         } else {
1792                 tcp_v6_send_reset(NULL, skb);
1793         }
1794
1795 discard_it:
1796         kfree_skb(skb);
1797         return 0;
1798
1799 discard_and_relse:
1800         sk_drops_add(sk, skb);
1801         if (refcounted)
1802                 sock_put(sk);
1803         goto discard_it;
1804
1805 do_time_wait:
1806         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1807                 inet_twsk_put(inet_twsk(sk));
1808                 goto discard_it;
1809         }
1810
1811         tcp_v6_fill_cb(skb, hdr, th);
1812
1813         if (tcp_checksum_complete(skb)) {
1814                 inet_twsk_put(inet_twsk(sk));
1815                 goto csum_error;
1816         }
1817
1818         switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1819         case TCP_TW_SYN:
1820         {
1821                 struct sock *sk2;
1822
1823                 sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
1824                                             skb, __tcp_hdrlen(th),
1825                                             &ipv6_hdr(skb)->saddr, th->source,
1826                                             &ipv6_hdr(skb)->daddr,
1827                                             ntohs(th->dest),
1828                                             tcp_v6_iif_l3_slave(skb),
1829                                             sdif);
1830                 if (sk2) {
1831                         struct inet_timewait_sock *tw = inet_twsk(sk);
1832                         inet_twsk_deschedule_put(tw);
1833                         sk = sk2;
1834                         tcp_v6_restore_cb(skb);
1835                         refcounted = false;
1836                         goto process;
1837                 }
1838         }
1839                 /* to ACK */
1840                 fallthrough;
1841         case TCP_TW_ACK:
1842                 tcp_v6_timewait_ack(sk, skb);
1843                 break;
1844         case TCP_TW_RST:
1845                 tcp_v6_send_reset(sk, skb);
1846                 inet_twsk_deschedule_put(inet_twsk(sk));
1847                 goto discard_it;
1848         case TCP_TW_SUCCESS:
1849                 ;
1850         }
1851         goto discard_it;
1852 }
1853
1854 INDIRECT_CALLABLE_SCOPE void tcp_v6_early_demux(struct sk_buff *skb)
1855 {
1856         const struct ipv6hdr *hdr;
1857         const struct tcphdr *th;
1858         struct sock *sk;
1859
1860         if (skb->pkt_type != PACKET_HOST)
1861                 return;
1862
1863         if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1864                 return;
1865
1866         hdr = ipv6_hdr(skb);
1867         th = tcp_hdr(skb);
1868
1869         if (th->doff < sizeof(struct tcphdr) / 4)
1870                 return;
1871
1872         /* Note : We use inet6_iif() here, not tcp_v6_iif() */
1873         sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
1874                                         &hdr->saddr, th->source,
1875                                         &hdr->daddr, ntohs(th->dest),
1876                                         inet6_iif(skb), inet6_sdif(skb));
1877         if (sk) {
1878                 skb->sk = sk;
1879                 skb->destructor = sock_edemux;
1880                 if (sk_fullsock(sk)) {
1881                         struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst);
1882
1883                         if (dst)
1884                                 dst = dst_check(dst, sk->sk_rx_dst_cookie);
1885                         if (dst &&
1886                             sk->sk_rx_dst_ifindex == skb->skb_iif)
1887                                 skb_dst_set_noref(skb, dst);
1888                 }
1889         }
1890 }
1891
1892 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1893         .twsk_obj_size  = sizeof(struct tcp6_timewait_sock),
1894         .twsk_unique    = tcp_twsk_unique,
1895         .twsk_destructor = tcp_twsk_destructor,
1896 };
1897
1898 INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)
1899 {
1900         struct ipv6_pinfo *np = inet6_sk(sk);
1901
1902         __tcp_v6_send_check(skb, &np->saddr, &sk->sk_v6_daddr);
1903 }
1904
1905 const struct inet_connection_sock_af_ops ipv6_specific = {
1906         .queue_xmit        = inet6_csk_xmit,
1907         .send_check        = tcp_v6_send_check,
1908         .rebuild_header    = inet6_sk_rebuild_header,
1909         .sk_rx_dst_set     = inet6_sk_rx_dst_set,
1910         .conn_request      = tcp_v6_conn_request,
1911         .syn_recv_sock     = tcp_v6_syn_recv_sock,
1912         .net_header_len    = sizeof(struct ipv6hdr),
1913         .net_frag_header_len = sizeof(struct frag_hdr),
1914         .setsockopt        = ipv6_setsockopt,
1915         .getsockopt        = ipv6_getsockopt,
1916         .addr2sockaddr     = inet6_csk_addr2sockaddr,
1917         .sockaddr_len      = sizeof(struct sockaddr_in6),
1918         .mtu_reduced       = tcp_v6_mtu_reduced,
1919 };
1920
1921 #ifdef CONFIG_TCP_MD5SIG
1922 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1923         .md5_lookup     =       tcp_v6_md5_lookup,
1924         .calc_md5_hash  =       tcp_v6_md5_hash_skb,
1925         .md5_parse      =       tcp_v6_parse_md5_keys,
1926 };
1927 #endif
1928
1929 /*
1930  *      TCP over IPv4 via INET6 API
1931  */
1932 static const struct inet_connection_sock_af_ops ipv6_mapped = {
1933         .queue_xmit        = ip_queue_xmit,
1934         .send_check        = tcp_v4_send_check,
1935         .rebuild_header    = inet_sk_rebuild_header,
1936         .sk_rx_dst_set     = inet_sk_rx_dst_set,
1937         .conn_request      = tcp_v6_conn_request,
1938         .syn_recv_sock     = tcp_v6_syn_recv_sock,
1939         .net_header_len    = sizeof(struct iphdr),
1940         .setsockopt        = ipv6_setsockopt,
1941         .getsockopt        = ipv6_getsockopt,
1942         .addr2sockaddr     = inet6_csk_addr2sockaddr,
1943         .sockaddr_len      = sizeof(struct sockaddr_in6),
1944         .mtu_reduced       = tcp_v4_mtu_reduced,
1945 };
1946
1947 #ifdef CONFIG_TCP_MD5SIG
1948 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1949         .md5_lookup     =       tcp_v4_md5_lookup,
1950         .calc_md5_hash  =       tcp_v4_md5_hash_skb,
1951         .md5_parse      =       tcp_v6_parse_md5_keys,
1952 };
1953 #endif
1954
1955 /* NOTE: A lot of things set to zero explicitly by call to
1956  *       sk_alloc() so need not be done here.
1957  */
1958 static int tcp_v6_init_sock(struct sock *sk)
1959 {
1960         struct inet_connection_sock *icsk = inet_csk(sk);
1961
1962         tcp_init_sock(sk);
1963
1964         icsk->icsk_af_ops = &ipv6_specific;
1965
1966 #ifdef CONFIG_TCP_MD5SIG
1967         tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1968 #endif
1969
1970         return 0;
1971 }
1972
1973 static void tcp_v6_destroy_sock(struct sock *sk)
1974 {
1975         tcp_v4_destroy_sock(sk);
1976         inet6_destroy_sock(sk);
1977 }
1978
1979 #ifdef CONFIG_PROC_FS
1980 /* Proc filesystem TCPv6 sock list dumping. */
1981 static void get_openreq6(struct seq_file *seq,
1982                          const struct request_sock *req, int i)
1983 {
1984         long ttd = req->rsk_timer.expires - jiffies;
1985         const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
1986         const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
1987
1988         if (ttd < 0)
1989                 ttd = 0;
1990
1991         seq_printf(seq,
1992                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1993                    "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
1994                    i,
1995                    src->s6_addr32[0], src->s6_addr32[1],
1996                    src->s6_addr32[2], src->s6_addr32[3],
1997                    inet_rsk(req)->ir_num,
1998                    dest->s6_addr32[0], dest->s6_addr32[1],
1999                    dest->s6_addr32[2], dest->s6_addr32[3],
2000                    ntohs(inet_rsk(req)->ir_rmt_port),
2001                    TCP_SYN_RECV,
2002                    0, 0, /* could print option size, but that is af dependent. */
2003                    1,   /* timers active (only the expire timer) */
2004                    jiffies_to_clock_t(ttd),
2005                    req->num_timeout,
2006                    from_kuid_munged(seq_user_ns(seq),
2007                                     sock_i_uid(req->rsk_listener)),
2008                    0,  /* non standard timer */
2009                    0, /* open_requests have no inode */
2010                    0, req);
2011 }
2012
2013 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
2014 {
2015         const struct in6_addr *dest, *src;
2016         __u16 destp, srcp;
2017         int timer_active;
2018         unsigned long timer_expires;
2019         const struct inet_sock *inet = inet_sk(sp);
2020         const struct tcp_sock *tp = tcp_sk(sp);
2021         const struct inet_connection_sock *icsk = inet_csk(sp);
2022         const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
2023         int rx_queue;
2024         int state;
2025
2026         dest  = &sp->sk_v6_daddr;
2027         src   = &sp->sk_v6_rcv_saddr;
2028         destp = ntohs(inet->inet_dport);
2029         srcp  = ntohs(inet->inet_sport);
2030
2031         if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
2032             icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
2033             icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
2034                 timer_active    = 1;
2035                 timer_expires   = icsk->icsk_timeout;
2036         } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2037                 timer_active    = 4;
2038                 timer_expires   = icsk->icsk_timeout;
2039         } else if (timer_pending(&sp->sk_timer)) {
2040                 timer_active    = 2;
2041                 timer_expires   = sp->sk_timer.expires;
2042         } else {
2043                 timer_active    = 0;
2044                 timer_expires = jiffies;
2045         }
2046
2047         state = inet_sk_state_load(sp);
2048         if (state == TCP_LISTEN)
2049                 rx_queue = READ_ONCE(sp->sk_ack_backlog);
2050         else
2051                 /* Because we don't lock the socket,
2052                  * we might find a transient negative value.
2053                  */
2054                 rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
2055                                       READ_ONCE(tp->copied_seq), 0);
2056
2057         seq_printf(seq,
2058                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2059                    "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
2060                    i,
2061                    src->s6_addr32[0], src->s6_addr32[1],
2062                    src->s6_addr32[2], src->s6_addr32[3], srcp,
2063                    dest->s6_addr32[0], dest->s6_addr32[1],
2064                    dest->s6_addr32[2], dest->s6_addr32[3], destp,
2065                    state,
2066                    READ_ONCE(tp->write_seq) - tp->snd_una,
2067                    rx_queue,
2068                    timer_active,
2069                    jiffies_delta_to_clock_t(timer_expires - jiffies),
2070                    icsk->icsk_retransmits,
2071                    from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
2072                    icsk->icsk_probes_out,
2073                    sock_i_ino(sp),
2074                    refcount_read(&sp->sk_refcnt), sp,
2075                    jiffies_to_clock_t(icsk->icsk_rto),
2076                    jiffies_to_clock_t(icsk->icsk_ack.ato),
2077                    (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp),
2078                    tcp_snd_cwnd(tp),
2079                    state == TCP_LISTEN ?
2080                         fastopenq->max_qlen :
2081                         (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
2082                    );
2083 }
2084
2085 static void get_timewait6_sock(struct seq_file *seq,
2086                                struct inet_timewait_sock *tw, int i)
2087 {
2088         long delta = tw->tw_timer.expires - jiffies;
2089         const struct in6_addr *dest, *src;
2090         __u16 destp, srcp;
2091
2092         dest = &tw->tw_v6_daddr;
2093         src  = &tw->tw_v6_rcv_saddr;
2094         destp = ntohs(tw->tw_dport);
2095         srcp  = ntohs(tw->tw_sport);
2096
2097         seq_printf(seq,
2098                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2099                    "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
2100                    i,
2101                    src->s6_addr32[0], src->s6_addr32[1],
2102                    src->s6_addr32[2], src->s6_addr32[3], srcp,
2103                    dest->s6_addr32[0], dest->s6_addr32[1],
2104                    dest->s6_addr32[2], dest->s6_addr32[3], destp,
2105                    tw->tw_substate, 0, 0,
2106                    3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
2107                    refcount_read(&tw->tw_refcnt), tw);
2108 }
2109
2110 static int tcp6_seq_show(struct seq_file *seq, void *v)
2111 {
2112         struct tcp_iter_state *st;
2113         struct sock *sk = v;
2114
2115         if (v == SEQ_START_TOKEN) {
2116                 seq_puts(seq,
2117                          "  sl  "
2118                          "local_address                         "
2119                          "remote_address                        "
2120                          "st tx_queue rx_queue tr tm->when retrnsmt"
2121                          "   uid  timeout inode\n");
2122                 goto out;
2123         }
2124         st = seq->private;
2125
2126         if (sk->sk_state == TCP_TIME_WAIT)
2127                 get_timewait6_sock(seq, v, st->num);
2128         else if (sk->sk_state == TCP_NEW_SYN_RECV)
2129                 get_openreq6(seq, v, st->num);
2130         else
2131                 get_tcp6_sock(seq, v, st->num);
2132 out:
2133         return 0;
2134 }
2135
2136 static const struct seq_operations tcp6_seq_ops = {
2137         .show           = tcp6_seq_show,
2138         .start          = tcp_seq_start,
2139         .next           = tcp_seq_next,
2140         .stop           = tcp_seq_stop,
2141 };
2142
2143 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2144         .family         = AF_INET6,
2145 };
2146
2147 int __net_init tcp6_proc_init(struct net *net)
2148 {
2149         if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops,
2150                         sizeof(struct tcp_iter_state), &tcp6_seq_afinfo))
2151                 return -ENOMEM;
2152         return 0;
2153 }
2154
2155 void tcp6_proc_exit(struct net *net)
2156 {
2157         remove_proc_entry("tcp6", net->proc_net);
2158 }
2159 #endif
2160
2161 struct proto tcpv6_prot = {
2162         .name                   = "TCPv6",
2163         .owner                  = THIS_MODULE,
2164         .close                  = tcp_close,
2165         .pre_connect            = tcp_v6_pre_connect,
2166         .connect                = tcp_v6_connect,
2167         .disconnect             = tcp_disconnect,
2168         .accept                 = inet_csk_accept,
2169         .ioctl                  = tcp_ioctl,
2170         .init                   = tcp_v6_init_sock,
2171         .destroy                = tcp_v6_destroy_sock,
2172         .shutdown               = tcp_shutdown,
2173         .setsockopt             = tcp_setsockopt,
2174         .getsockopt             = tcp_getsockopt,
2175         .bpf_bypass_getsockopt  = tcp_bpf_bypass_getsockopt,
2176         .keepalive              = tcp_set_keepalive,
2177         .recvmsg                = tcp_recvmsg,
2178         .sendmsg                = tcp_sendmsg,
2179         .sendpage               = tcp_sendpage,
2180         .backlog_rcv            = tcp_v6_do_rcv,
2181         .release_cb             = tcp_release_cb,
2182         .hash                   = inet6_hash,
2183         .unhash                 = inet_unhash,
2184         .get_port               = inet_csk_get_port,
2185 #ifdef CONFIG_BPF_SYSCALL
2186         .psock_update_sk_prot   = tcp_bpf_update_proto,
2187 #endif
2188         .enter_memory_pressure  = tcp_enter_memory_pressure,
2189         .leave_memory_pressure  = tcp_leave_memory_pressure,
2190         .stream_memory_free     = tcp_stream_memory_free,
2191         .sockets_allocated      = &tcp_sockets_allocated,
2192         .memory_allocated       = &tcp_memory_allocated,
2193         .memory_pressure        = &tcp_memory_pressure,
2194         .orphan_count           = &tcp_orphan_count,
2195         .sysctl_mem             = sysctl_tcp_mem,
2196         .sysctl_wmem_offset     = offsetof(struct net, ipv4.sysctl_tcp_wmem),
2197         .sysctl_rmem_offset     = offsetof(struct net, ipv4.sysctl_tcp_rmem),
2198         .max_header             = MAX_TCP_HEADER,
2199         .obj_size               = sizeof(struct tcp6_sock),
2200         .slab_flags             = SLAB_TYPESAFE_BY_RCU,
2201         .twsk_prot              = &tcp6_timewait_sock_ops,
2202         .rsk_prot               = &tcp6_request_sock_ops,
2203         .h.hashinfo             = &tcp_hashinfo,
2204         .no_autobind            = true,
2205         .diag_destroy           = tcp_abort,
2206 };
2207 EXPORT_SYMBOL_GPL(tcpv6_prot);
2208
2209 /* thinking of making this const? Don't.
2210  * early_demux can change based on sysctl.
2211  */
2212 static struct inet6_protocol tcpv6_protocol = {
2213         .early_demux    =       tcp_v6_early_demux,
2214         .early_demux_handler =  tcp_v6_early_demux,
2215         .handler        =       tcp_v6_rcv,
2216         .err_handler    =       tcp_v6_err,
2217         .flags          =       INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2218 };
2219
2220 static struct inet_protosw tcpv6_protosw = {
2221         .type           =       SOCK_STREAM,
2222         .protocol       =       IPPROTO_TCP,
2223         .prot           =       &tcpv6_prot,
2224         .ops            =       &inet6_stream_ops,
2225         .flags          =       INET_PROTOSW_PERMANENT |
2226                                 INET_PROTOSW_ICSK,
2227 };
2228
2229 static int __net_init tcpv6_net_init(struct net *net)
2230 {
2231         return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2232                                     SOCK_RAW, IPPROTO_TCP, net);
2233 }
2234
2235 static void __net_exit tcpv6_net_exit(struct net *net)
2236 {
2237         inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2238 }
2239
2240 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
2241 {
2242         inet_twsk_purge(&tcp_hashinfo, AF_INET6);
2243 }
2244
2245 static struct pernet_operations tcpv6_net_ops = {
2246         .init       = tcpv6_net_init,
2247         .exit       = tcpv6_net_exit,
2248         .exit_batch = tcpv6_net_exit_batch,
2249 };
2250
2251 int __init tcpv6_init(void)
2252 {
2253         int ret;
2254
2255         ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2256         if (ret)
2257                 goto out;
2258
2259         /* register inet6 protocol */
2260         ret = inet6_register_protosw(&tcpv6_protosw);
2261         if (ret)
2262                 goto out_tcpv6_protocol;
2263
2264         ret = register_pernet_subsys(&tcpv6_net_ops);
2265         if (ret)
2266                 goto out_tcpv6_protosw;
2267
2268         ret = mptcpv6_init();
2269         if (ret)
2270                 goto out_tcpv6_pernet_subsys;
2271
2272 out:
2273         return ret;
2274
2275 out_tcpv6_pernet_subsys:
2276         unregister_pernet_subsys(&tcpv6_net_ops);
2277 out_tcpv6_protosw:
2278         inet6_unregister_protosw(&tcpv6_protosw);
2279 out_tcpv6_protocol:
2280         inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2281         goto out;
2282 }
2283
2284 void tcpv6_exit(void)
2285 {
2286         unregister_pernet_subsys(&tcpv6_net_ops);
2287         inet6_unregister_protosw(&tcpv6_protosw);
2288         inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2289 }