1 // SPDX-License-Identifier: GPL-2.0-or-later
4 * Linux INET6 implementation
7 * Pedro Roque <roque@di.fc.ul.pt>
10 * linux/net/ipv4/tcp.c
11 * linux/net/ipv4/tcp_input.c
12 * linux/net/ipv4/tcp_output.c
15 * Hideaki YOSHIFUJI : sin6_scope_id support
16 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
17 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
18 * a single port at the same time.
19 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
22 #include <linux/bottom_half.h>
23 #include <linux/module.h>
24 #include <linux/errno.h>
25 #include <linux/types.h>
26 #include <linux/socket.h>
27 #include <linux/sockios.h>
28 #include <linux/net.h>
29 #include <linux/jiffies.h>
31 #include <linux/in6.h>
32 #include <linux/netdevice.h>
33 #include <linux/init.h>
34 #include <linux/jhash.h>
35 #include <linux/ipsec.h>
36 #include <linux/times.h>
37 #include <linux/slab.h>
38 #include <linux/uaccess.h>
39 #include <linux/ipv6.h>
40 #include <linux/icmpv6.h>
41 #include <linux/random.h>
42 #include <linux/indirect_call_wrapper.h>
45 #include <net/ndisc.h>
46 #include <net/inet6_hashtables.h>
47 #include <net/inet6_connection_sock.h>
49 #include <net/transp_v6.h>
50 #include <net/addrconf.h>
51 #include <net/ip6_route.h>
52 #include <net/ip6_checksum.h>
53 #include <net/inet_ecn.h>
54 #include <net/protocol.h>
57 #include <net/dsfield.h>
58 #include <net/timewait_sock.h>
59 #include <net/inet_common.h>
60 #include <net/secure_seq.h>
61 #include <net/busy_poll.h>
63 #include <linux/proc_fs.h>
64 #include <linux/seq_file.h>
66 #include <crypto/hash.h>
67 #include <linux/scatterlist.h>
69 #include <trace/events/tcp.h>
71 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
72 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
73 struct request_sock *req);
75 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
77 static const struct inet_connection_sock_af_ops ipv6_mapped;
78 const struct inet_connection_sock_af_ops ipv6_specific;
79 #ifdef CONFIG_TCP_MD5SIG
80 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
81 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
83 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
84 const struct in6_addr *addr,
91 /* Helper returning the inet6 address from a given tcp socket.
92 * It can be used in TCP stack instead of inet6_sk(sk).
93 * This avoids a dereference and allow compiler optimizations.
94 * It is a specialized version of inet6_sk_generic().
96 static struct ipv6_pinfo *tcp_inet6_sk(const struct sock *sk)
98 unsigned int offset = sizeof(struct tcp6_sock) - sizeof(struct ipv6_pinfo);
100 return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
103 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
105 struct dst_entry *dst = skb_dst(skb);
107 if (dst && dst_hold_safe(dst)) {
108 const struct rt6_info *rt = (const struct rt6_info *)dst;
111 inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
112 tcp_inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt);
116 static u32 tcp_v6_init_seq(const struct sk_buff *skb)
118 return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32,
119 ipv6_hdr(skb)->saddr.s6_addr32,
121 tcp_hdr(skb)->source);
124 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
126 return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32,
127 ipv6_hdr(skb)->saddr.s6_addr32);
130 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
133 /* This check is replicated from tcp_v6_connect() and intended to
134 * prevent BPF program called below from accessing bytes that are out
135 * of the bound specified by user in addr_len.
137 if (addr_len < SIN6_LEN_RFC2133)
140 sock_owned_by_me(sk);
142 return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr);
145 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
148 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
149 struct inet_sock *inet = inet_sk(sk);
150 struct inet_connection_sock *icsk = inet_csk(sk);
151 struct ipv6_pinfo *np = tcp_inet6_sk(sk);
152 struct tcp_sock *tp = tcp_sk(sk);
153 struct in6_addr *saddr = NULL, *final_p, final;
154 struct ipv6_txoptions *opt;
156 struct dst_entry *dst;
159 struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
161 if (addr_len < SIN6_LEN_RFC2133)
164 if (usin->sin6_family != AF_INET6)
165 return -EAFNOSUPPORT;
167 memset(&fl6, 0, sizeof(fl6));
170 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
171 IP6_ECN_flow_init(fl6.flowlabel);
172 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
173 struct ip6_flowlabel *flowlabel;
174 flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
175 if (IS_ERR(flowlabel))
177 fl6_sock_release(flowlabel);
182 * connect() to INADDR_ANY means loopback (BSD'ism).
185 if (ipv6_addr_any(&usin->sin6_addr)) {
186 if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
187 ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
190 usin->sin6_addr = in6addr_loopback;
193 addr_type = ipv6_addr_type(&usin->sin6_addr);
195 if (addr_type & IPV6_ADDR_MULTICAST)
198 if (addr_type&IPV6_ADDR_LINKLOCAL) {
199 if (addr_len >= sizeof(struct sockaddr_in6) &&
200 usin->sin6_scope_id) {
201 /* If interface is set while binding, indices
204 if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id))
207 sk->sk_bound_dev_if = usin->sin6_scope_id;
210 /* Connect to link-local address requires an interface */
211 if (!sk->sk_bound_dev_if)
215 if (tp->rx_opt.ts_recent_stamp &&
216 !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
217 tp->rx_opt.ts_recent = 0;
218 tp->rx_opt.ts_recent_stamp = 0;
219 WRITE_ONCE(tp->write_seq, 0);
222 sk->sk_v6_daddr = usin->sin6_addr;
223 np->flow_label = fl6.flowlabel;
229 if (addr_type & IPV6_ADDR_MAPPED) {
230 u32 exthdrlen = icsk->icsk_ext_hdr_len;
231 struct sockaddr_in sin;
233 if (__ipv6_only_sock(sk))
236 sin.sin_family = AF_INET;
237 sin.sin_port = usin->sin6_port;
238 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
240 icsk->icsk_af_ops = &ipv6_mapped;
242 mptcpv6_handle_mapped(sk, true);
243 sk->sk_backlog_rcv = tcp_v4_do_rcv;
244 #ifdef CONFIG_TCP_MD5SIG
245 tp->af_specific = &tcp_sock_ipv6_mapped_specific;
248 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
251 icsk->icsk_ext_hdr_len = exthdrlen;
252 icsk->icsk_af_ops = &ipv6_specific;
254 mptcpv6_handle_mapped(sk, false);
255 sk->sk_backlog_rcv = tcp_v6_do_rcv;
256 #ifdef CONFIG_TCP_MD5SIG
257 tp->af_specific = &tcp_sock_ipv6_specific;
261 np->saddr = sk->sk_v6_rcv_saddr;
266 if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
267 saddr = &sk->sk_v6_rcv_saddr;
269 fl6.flowi6_proto = IPPROTO_TCP;
270 fl6.daddr = sk->sk_v6_daddr;
271 fl6.saddr = saddr ? *saddr : np->saddr;
272 fl6.flowi6_oif = sk->sk_bound_dev_if;
273 fl6.flowi6_mark = sk->sk_mark;
274 fl6.fl6_dport = usin->sin6_port;
275 fl6.fl6_sport = inet->inet_sport;
276 fl6.flowi6_uid = sk->sk_uid;
278 opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
279 final_p = fl6_update_dst(&fl6, opt, &final);
281 security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
283 dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
291 sk->sk_v6_rcv_saddr = *saddr;
294 /* set the source address */
296 inet->inet_rcv_saddr = LOOPBACK4_IPV6;
298 sk->sk_gso_type = SKB_GSO_TCPV6;
299 ip6_dst_store(sk, dst, NULL, NULL);
301 icsk->icsk_ext_hdr_len = 0;
303 icsk->icsk_ext_hdr_len = opt->opt_flen +
306 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
308 inet->inet_dport = usin->sin6_port;
310 tcp_set_state(sk, TCP_SYN_SENT);
311 err = inet6_hash_connect(tcp_death_row, sk);
317 if (likely(!tp->repair)) {
319 WRITE_ONCE(tp->write_seq,
320 secure_tcpv6_seq(np->saddr.s6_addr32,
321 sk->sk_v6_daddr.s6_addr32,
324 tp->tsoffset = secure_tcpv6_ts_off(sock_net(sk),
326 sk->sk_v6_daddr.s6_addr32);
329 if (tcp_fastopen_defer_connect(sk, &err))
334 err = tcp_connect(sk);
341 tcp_set_state(sk, TCP_CLOSE);
343 inet->inet_dport = 0;
344 sk->sk_route_caps = 0;
348 static void tcp_v6_mtu_reduced(struct sock *sk)
350 struct dst_entry *dst;
353 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
356 mtu = READ_ONCE(tcp_sk(sk)->mtu_info);
358 /* Drop requests trying to increase our current mss.
359 * Check done in __ip6_rt_update_pmtu() is too late.
361 if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache)
364 dst = inet6_csk_update_pmtu(sk, mtu);
368 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
369 tcp_sync_mss(sk, dst_mtu(dst));
370 tcp_simple_retransmit(sk);
374 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
375 u8 type, u8 code, int offset, __be32 info)
377 const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
378 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
379 struct net *net = dev_net(skb->dev);
380 struct request_sock *fastopen;
381 struct ipv6_pinfo *np;
388 sk = __inet6_lookup_established(net, &tcp_hashinfo,
389 &hdr->daddr, th->dest,
390 &hdr->saddr, ntohs(th->source),
391 skb->dev->ifindex, inet6_sdif(skb));
394 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
399 if (sk->sk_state == TCP_TIME_WAIT) {
400 inet_twsk_put(inet_twsk(sk));
403 seq = ntohl(th->seq);
404 fatal = icmpv6_err_convert(type, code, &err);
405 if (sk->sk_state == TCP_NEW_SYN_RECV) {
406 tcp_req_err(sk, seq, fatal);
411 if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
412 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
414 if (sk->sk_state == TCP_CLOSE)
417 if (ipv6_hdr(skb)->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
418 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
423 /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
424 fastopen = rcu_dereference(tp->fastopen_rsk);
425 snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
426 if (sk->sk_state != TCP_LISTEN &&
427 !between(seq, snd_una, tp->snd_nxt)) {
428 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
432 np = tcp_inet6_sk(sk);
434 if (type == NDISC_REDIRECT) {
435 if (!sock_owned_by_user(sk)) {
436 struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
439 dst->ops->redirect(dst, sk, skb);
444 if (type == ICMPV6_PKT_TOOBIG) {
445 u32 mtu = ntohl(info);
447 /* We are not interested in TCP_LISTEN and open_requests
448 * (SYN-ACKs send out by Linux are always <576bytes so
449 * they should go through unfragmented).
451 if (sk->sk_state == TCP_LISTEN)
454 if (!ip6_sk_accept_pmtu(sk))
457 if (mtu < IPV6_MIN_MTU)
460 WRITE_ONCE(tp->mtu_info, mtu);
462 if (!sock_owned_by_user(sk))
463 tcp_v6_mtu_reduced(sk);
464 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
471 /* Might be for an request_sock */
472 switch (sk->sk_state) {
475 /* Only in fast or simultaneous open. If a fast open socket is
476 * already accepted it is treated as a connected one below.
478 if (fastopen && !fastopen->sk)
481 ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th);
483 if (!sock_owned_by_user(sk)) {
485 sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
489 sk->sk_err_soft = err;
494 /* check if this ICMP message allows revert of backoff.
497 if (!fastopen && type == ICMPV6_DEST_UNREACH &&
498 code == ICMPV6_NOROUTE)
499 tcp_ld_RTO_revert(sk, seq);
502 if (!sock_owned_by_user(sk) && np->recverr) {
506 sk->sk_err_soft = err;
515 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
517 struct request_sock *req,
518 struct tcp_fastopen_cookie *foc,
519 enum tcp_synack_type synack_type,
520 struct sk_buff *syn_skb)
522 struct inet_request_sock *ireq = inet_rsk(req);
523 struct ipv6_pinfo *np = tcp_inet6_sk(sk);
524 struct ipv6_txoptions *opt;
525 struct flowi6 *fl6 = &fl->u.ip6;
530 /* First, grab a route. */
531 if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
532 IPPROTO_TCP)) == NULL)
535 skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb);
538 __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
539 &ireq->ir_v6_rmt_addr);
541 fl6->daddr = ireq->ir_v6_rmt_addr;
542 if (np->repflow && ireq->pktopts)
543 fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
545 tclass = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ?
546 (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
547 (np->tclass & INET_ECN_MASK) :
550 if (!INET_ECN_is_capable(tclass) &&
551 tcp_bpf_ca_needs_ecn((struct sock *)req))
552 tclass |= INET_ECN_ECT_0;
555 opt = ireq->ipv6_opt;
557 opt = rcu_dereference(np->opt);
558 err = ip6_xmit(sk, skb, fl6, skb->mark ? : sk->sk_mark, opt,
559 tclass, sk->sk_priority);
561 err = net_xmit_eval(err);
569 static void tcp_v6_reqsk_destructor(struct request_sock *req)
571 kfree(inet_rsk(req)->ipv6_opt);
572 kfree_skb(inet_rsk(req)->pktopts);
575 #ifdef CONFIG_TCP_MD5SIG
576 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
577 const struct in6_addr *addr,
580 return tcp_md5_do_lookup(sk, l3index,
581 (union tcp_md5_addr *)addr, AF_INET6);
584 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
585 const struct sock *addr_sk)
589 l3index = l3mdev_master_ifindex_by_index(sock_net(sk),
590 addr_sk->sk_bound_dev_if);
591 return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr,
595 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
596 sockptr_t optval, int optlen)
598 struct tcp_md5sig cmd;
599 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
603 if (optlen < sizeof(cmd))
606 if (copy_from_sockptr(&cmd, optval, sizeof(cmd)))
609 if (sin6->sin6_family != AF_INET6)
612 if (optname == TCP_MD5SIG_EXT &&
613 cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
614 prefixlen = cmd.tcpm_prefixlen;
615 if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
619 prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
622 if (optname == TCP_MD5SIG_EXT &&
623 cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) {
624 struct net_device *dev;
627 dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex);
628 if (dev && netif_is_l3_master(dev))
629 l3index = dev->ifindex;
632 /* ok to reference set/not set outside of rcu;
633 * right now device MUST be an L3 master
635 if (!dev || !l3index)
639 if (!cmd.tcpm_keylen) {
640 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
641 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
644 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
645 AF_INET6, prefixlen, l3index);
648 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
651 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
652 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
653 AF_INET, prefixlen, l3index,
654 cmd.tcpm_key, cmd.tcpm_keylen,
657 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
658 AF_INET6, prefixlen, l3index,
659 cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
662 static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
663 const struct in6_addr *daddr,
664 const struct in6_addr *saddr,
665 const struct tcphdr *th, int nbytes)
667 struct tcp6_pseudohdr *bp;
668 struct scatterlist sg;
672 /* 1. TCP pseudo-header (RFC2460) */
675 bp->protocol = cpu_to_be32(IPPROTO_TCP);
676 bp->len = cpu_to_be32(nbytes);
678 _th = (struct tcphdr *)(bp + 1);
679 memcpy(_th, th, sizeof(*th));
682 sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
683 ahash_request_set_crypt(hp->md5_req, &sg, NULL,
684 sizeof(*bp) + sizeof(*th));
685 return crypto_ahash_update(hp->md5_req);
688 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
689 const struct in6_addr *daddr, struct in6_addr *saddr,
690 const struct tcphdr *th)
692 struct tcp_md5sig_pool *hp;
693 struct ahash_request *req;
695 hp = tcp_get_md5sig_pool();
697 goto clear_hash_noput;
700 if (crypto_ahash_init(req))
702 if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
704 if (tcp_md5_hash_key(hp, key))
706 ahash_request_set_crypt(req, NULL, md5_hash, 0);
707 if (crypto_ahash_final(req))
710 tcp_put_md5sig_pool();
714 tcp_put_md5sig_pool();
716 memset(md5_hash, 0, 16);
720 static int tcp_v6_md5_hash_skb(char *md5_hash,
721 const struct tcp_md5sig_key *key,
722 const struct sock *sk,
723 const struct sk_buff *skb)
725 const struct in6_addr *saddr, *daddr;
726 struct tcp_md5sig_pool *hp;
727 struct ahash_request *req;
728 const struct tcphdr *th = tcp_hdr(skb);
730 if (sk) { /* valid for establish/request sockets */
731 saddr = &sk->sk_v6_rcv_saddr;
732 daddr = &sk->sk_v6_daddr;
734 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
735 saddr = &ip6h->saddr;
736 daddr = &ip6h->daddr;
739 hp = tcp_get_md5sig_pool();
741 goto clear_hash_noput;
744 if (crypto_ahash_init(req))
747 if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
749 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
751 if (tcp_md5_hash_key(hp, key))
753 ahash_request_set_crypt(req, NULL, md5_hash, 0);
754 if (crypto_ahash_final(req))
757 tcp_put_md5sig_pool();
761 tcp_put_md5sig_pool();
763 memset(md5_hash, 0, 16);
769 static bool tcp_v6_inbound_md5_hash(const struct sock *sk,
770 const struct sk_buff *skb,
773 #ifdef CONFIG_TCP_MD5SIG
774 const __u8 *hash_location = NULL;
775 struct tcp_md5sig_key *hash_expected;
776 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
777 const struct tcphdr *th = tcp_hdr(skb);
778 int genhash, l3index;
781 /* sdif set, means packet ingressed via a device
782 * in an L3 domain and dif is set to the l3mdev
784 l3index = sdif ? dif : 0;
786 hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr, l3index);
787 hash_location = tcp_parse_md5sig_option(th);
789 /* We've parsed the options - do we have a hash? */
790 if (!hash_expected && !hash_location)
793 if (hash_expected && !hash_location) {
794 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
798 if (!hash_expected && hash_location) {
799 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
803 /* check the signature */
804 genhash = tcp_v6_md5_hash_skb(newhash,
808 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
809 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
810 net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u L3 index %d\n",
811 genhash ? "failed" : "mismatch",
812 &ip6h->saddr, ntohs(th->source),
813 &ip6h->daddr, ntohs(th->dest), l3index);
820 static void tcp_v6_init_req(struct request_sock *req,
821 const struct sock *sk_listener,
824 bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
825 struct inet_request_sock *ireq = inet_rsk(req);
826 const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener);
828 ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
829 ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
831 /* So that link locals have meaning */
832 if ((!sk_listener->sk_bound_dev_if || l3_slave) &&
833 ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
834 ireq->ir_iif = tcp_v6_iif(skb);
836 if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
837 (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
838 np->rxopt.bits.rxinfo ||
839 np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
840 np->rxopt.bits.rxohlim || np->repflow)) {
841 refcount_inc(&skb->users);
846 static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
849 struct request_sock *req)
851 tcp_v6_init_req(req, sk, skb);
853 if (security_inet_conn_request(sk, skb, req))
856 return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
859 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
861 .obj_size = sizeof(struct tcp6_request_sock),
862 .rtx_syn_ack = tcp_rtx_synack,
863 .send_ack = tcp_v6_reqsk_send_ack,
864 .destructor = tcp_v6_reqsk_destructor,
865 .send_reset = tcp_v6_send_reset,
866 .syn_ack_timeout = tcp_syn_ack_timeout,
869 const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
870 .mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) -
871 sizeof(struct ipv6hdr),
872 #ifdef CONFIG_TCP_MD5SIG
873 .req_md5_lookup = tcp_v6_md5_lookup,
874 .calc_md5_hash = tcp_v6_md5_hash_skb,
876 #ifdef CONFIG_SYN_COOKIES
877 .cookie_init_seq = cookie_v6_init_sequence,
879 .route_req = tcp_v6_route_req,
880 .init_seq = tcp_v6_init_seq,
881 .init_ts_off = tcp_v6_init_ts_off,
882 .send_synack = tcp_v6_send_synack,
885 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
886 u32 ack, u32 win, u32 tsval, u32 tsecr,
887 int oif, struct tcp_md5sig_key *key, int rst,
888 u8 tclass, __be32 label, u32 priority)
890 const struct tcphdr *th = tcp_hdr(skb);
892 struct sk_buff *buff;
894 struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
895 struct sock *ctl_sk = net->ipv6.tcp_sk;
896 unsigned int tot_len = sizeof(struct tcphdr);
897 __be32 mrst = 0, *topt;
898 struct dst_entry *dst;
902 tot_len += TCPOLEN_TSTAMP_ALIGNED;
903 #ifdef CONFIG_TCP_MD5SIG
905 tot_len += TCPOLEN_MD5SIG_ALIGNED;
910 mrst = mptcp_reset_option(skb);
913 tot_len += sizeof(__be32);
917 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
922 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
924 t1 = skb_push(buff, tot_len);
925 skb_reset_transport_header(buff);
927 /* Swap the send and the receive. */
928 memset(t1, 0, sizeof(*t1));
929 t1->dest = th->source;
930 t1->source = th->dest;
931 t1->doff = tot_len / 4;
932 t1->seq = htonl(seq);
933 t1->ack_seq = htonl(ack);
934 t1->ack = !rst || !th->ack;
936 t1->window = htons(win);
938 topt = (__be32 *)(t1 + 1);
941 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
942 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
943 *topt++ = htonl(tsval);
944 *topt++ = htonl(tsecr);
950 #ifdef CONFIG_TCP_MD5SIG
952 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
953 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
954 tcp_v6_md5_hash_hdr((__u8 *)topt, key,
955 &ipv6_hdr(skb)->saddr,
956 &ipv6_hdr(skb)->daddr, t1);
960 memset(&fl6, 0, sizeof(fl6));
961 fl6.daddr = ipv6_hdr(skb)->saddr;
962 fl6.saddr = ipv6_hdr(skb)->daddr;
963 fl6.flowlabel = label;
965 buff->ip_summed = CHECKSUM_PARTIAL;
968 __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
970 fl6.flowi6_proto = IPPROTO_TCP;
971 if (rt6_need_strict(&fl6.daddr) && !oif)
972 fl6.flowi6_oif = tcp_v6_iif(skb);
974 if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
977 fl6.flowi6_oif = oif;
981 if (sk->sk_state == TCP_TIME_WAIT) {
982 mark = inet_twsk(sk)->tw_mark;
983 /* autoflowlabel relies on buff->hash */
984 skb_set_hash(buff, inet_twsk(sk)->tw_txhash,
989 buff->tstamp = tcp_transmit_time(sk);
991 fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark;
992 fl6.fl6_dport = t1->dest;
993 fl6.fl6_sport = t1->source;
994 fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
995 security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
997 /* Pass a socket to ip6_dst_lookup either it is for RST
998 * Underlying function will use this to retrieve the network
1001 dst = ip6_dst_lookup_flow(sock_net(ctl_sk), ctl_sk, &fl6, NULL);
1003 skb_dst_set(buff, dst);
1004 ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL,
1005 tclass & ~INET_ECN_MASK, priority);
1006 TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
1008 TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
1015 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
1017 const struct tcphdr *th = tcp_hdr(skb);
1018 struct ipv6hdr *ipv6h = ipv6_hdr(skb);
1019 u32 seq = 0, ack_seq = 0;
1020 struct tcp_md5sig_key *key = NULL;
1021 #ifdef CONFIG_TCP_MD5SIG
1022 const __u8 *hash_location = NULL;
1023 unsigned char newhash[16];
1025 struct sock *sk1 = NULL;
1035 /* If sk not NULL, it means we did a successful lookup and incoming
1036 * route had to be correct. prequeue might have dropped our dst.
1038 if (!sk && !ipv6_unicast_destination(skb))
1041 net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
1042 #ifdef CONFIG_TCP_MD5SIG
1044 hash_location = tcp_parse_md5sig_option(th);
1045 if (sk && sk_fullsock(sk)) {
1048 /* sdif set, means packet ingressed via a device
1049 * in an L3 domain and inet_iif is set to it.
1051 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1052 key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index);
1053 } else if (hash_location) {
1054 int dif = tcp_v6_iif_l3_slave(skb);
1055 int sdif = tcp_v6_sdif(skb);
1059 * active side is lost. Try to find listening socket through
1060 * source port, and then find md5 key through listening socket.
1061 * we are not loose security here:
1062 * Incoming packet is checked with md5 hash with finding key,
1063 * no RST generated if md5 hash doesn't match.
1065 sk1 = inet6_lookup_listener(net,
1066 &tcp_hashinfo, NULL, 0,
1068 th->source, &ipv6h->daddr,
1069 ntohs(th->source), dif, sdif);
1073 /* sdif set, means packet ingressed via a device
1074 * in an L3 domain and dif is set to it.
1076 l3index = tcp_v6_sdif(skb) ? dif : 0;
1078 key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index);
1082 genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
1083 if (genhash || memcmp(hash_location, newhash, 16) != 0)
1089 seq = ntohl(th->ack_seq);
1091 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
1095 oif = sk->sk_bound_dev_if;
1096 if (sk_fullsock(sk)) {
1097 const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1099 trace_tcp_send_reset(sk, skb);
1101 label = ip6_flowlabel(ipv6h);
1102 priority = sk->sk_priority;
1104 if (sk->sk_state == TCP_TIME_WAIT) {
1105 label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel);
1106 priority = inet_twsk(sk)->tw_priority;
1109 if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET)
1110 label = ip6_flowlabel(ipv6h);
1113 tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1,
1114 ipv6_get_dsfield(ipv6h), label, priority);
1116 #ifdef CONFIG_TCP_MD5SIG
1122 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
1123 u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
1124 struct tcp_md5sig_key *key, u8 tclass,
1125 __be32 label, u32 priority)
1127 tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
1128 tclass, label, priority);
1131 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1133 struct inet_timewait_sock *tw = inet_twsk(sk);
1134 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1136 tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1137 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1138 tcp_time_stamp_raw() + tcptw->tw_ts_offset,
1139 tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
1140 tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority);
1145 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
1146 struct request_sock *req)
1150 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1152 /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
1153 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
1156 * The window field (SEG.WND) of every outgoing segment, with the
1157 * exception of <SYN> segments, MUST be right-shifted by
1158 * Rcv.Wind.Shift bits:
1160 tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
1161 tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
1162 tcp_rsk(req)->rcv_nxt,
1163 req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
1164 tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
1165 req->ts_recent, sk->sk_bound_dev_if,
1166 tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index),
1167 ipv6_get_dsfield(ipv6_hdr(skb)), 0, sk->sk_priority);
1171 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
1173 #ifdef CONFIG_SYN_COOKIES
1174 const struct tcphdr *th = tcp_hdr(skb);
1177 sk = cookie_v6_check(sk, skb);
1182 u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
1183 struct tcphdr *th, u32 *cookie)
1186 #ifdef CONFIG_SYN_COOKIES
1187 mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops,
1188 &tcp_request_sock_ipv6_ops, sk, th);
1190 *cookie = __cookie_v6_init_sequence(iph, th, &mss);
1191 tcp_synq_overflow(sk);
1197 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1199 if (skb->protocol == htons(ETH_P_IP))
1200 return tcp_v4_conn_request(sk, skb);
1202 if (!ipv6_unicast_destination(skb))
1205 if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) {
1206 __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS);
1210 return tcp_conn_request(&tcp6_request_sock_ops,
1211 &tcp_request_sock_ipv6_ops, sk, skb);
1215 return 0; /* don't send reset */
1218 static void tcp_v6_restore_cb(struct sk_buff *skb)
1220 /* We need to move header back to the beginning if xfrm6_policy_check()
1221 * and tcp_v6_fill_cb() are going to be called again.
1222 * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
1224 memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
1225 sizeof(struct inet6_skb_parm));
1228 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1229 struct request_sock *req,
1230 struct dst_entry *dst,
1231 struct request_sock *req_unhash,
1234 struct inet_request_sock *ireq;
1235 struct ipv6_pinfo *newnp;
1236 const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1237 struct ipv6_txoptions *opt;
1238 struct inet_sock *newinet;
1239 bool found_dup_sk = false;
1240 struct tcp_sock *newtp;
1242 #ifdef CONFIG_TCP_MD5SIG
1243 struct tcp_md5sig_key *key;
1248 if (skb->protocol == htons(ETH_P_IP)) {
1253 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
1254 req_unhash, own_req);
1259 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1261 newinet = inet_sk(newsk);
1262 newnp = tcp_inet6_sk(newsk);
1263 newtp = tcp_sk(newsk);
1265 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1267 newnp->saddr = newsk->sk_v6_rcv_saddr;
1269 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1270 if (sk_is_mptcp(newsk))
1271 mptcpv6_handle_mapped(newsk, true);
1272 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1273 #ifdef CONFIG_TCP_MD5SIG
1274 newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1277 newnp->ipv6_mc_list = NULL;
1278 newnp->ipv6_ac_list = NULL;
1279 newnp->ipv6_fl_list = NULL;
1280 newnp->pktoptions = NULL;
1282 newnp->mcast_oif = inet_iif(skb);
1283 newnp->mcast_hops = ip_hdr(skb)->ttl;
1284 newnp->rcv_flowinfo = 0;
1286 newnp->flow_label = 0;
1289 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1290 * here, tcp_create_openreq_child now does this for us, see the comment in
1291 * that function for the gory details. -acme
1294 /* It is tricky place. Until this moment IPv4 tcp
1295 worked with IPv6 icsk.icsk_af_ops.
1298 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1303 ireq = inet_rsk(req);
1305 if (sk_acceptq_is_full(sk))
1309 dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
1314 newsk = tcp_create_openreq_child(sk, req, skb);
1319 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1320 * count here, tcp_create_openreq_child now does this for us, see the
1321 * comment in that function for the gory details. -acme
1324 newsk->sk_gso_type = SKB_GSO_TCPV6;
1325 ip6_dst_store(newsk, dst, NULL, NULL);
1326 inet6_sk_rx_dst_set(newsk, skb);
1328 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1330 newtp = tcp_sk(newsk);
1331 newinet = inet_sk(newsk);
1332 newnp = tcp_inet6_sk(newsk);
1334 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1336 newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
1337 newnp->saddr = ireq->ir_v6_loc_addr;
1338 newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
1339 newsk->sk_bound_dev_if = ireq->ir_iif;
1341 /* Now IPv6 options...
1343 First: no IPv4 options.
1345 newinet->inet_opt = NULL;
1346 newnp->ipv6_mc_list = NULL;
1347 newnp->ipv6_ac_list = NULL;
1348 newnp->ipv6_fl_list = NULL;
1351 newnp->rxopt.all = np->rxopt.all;
1353 newnp->pktoptions = NULL;
1355 newnp->mcast_oif = tcp_v6_iif(skb);
1356 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1357 newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1359 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1361 /* Set ToS of the new socket based upon the value of incoming SYN.
1362 * ECT bits are set later in tcp_init_transfer().
1364 if (sock_net(sk)->ipv4.sysctl_tcp_reflect_tos)
1365 newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;
1367 /* Clone native IPv6 options from listening socket (if any)
1369 Yes, keeping reference count would be much more clever,
1370 but we make one more one thing there: reattach optmem
1373 opt = ireq->ipv6_opt;
1375 opt = rcu_dereference(np->opt);
1377 opt = ipv6_dup_options(newsk, opt);
1378 RCU_INIT_POINTER(newnp->opt, opt);
1380 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1382 inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
1385 tcp_ca_openreq_child(newsk, dst);
1387 tcp_sync_mss(newsk, dst_mtu(dst));
1388 newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1390 tcp_initialize_rcv_mss(newsk);
1392 newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1393 newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1395 #ifdef CONFIG_TCP_MD5SIG
1396 l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif);
1398 /* Copy over the MD5 key from the original socket */
1399 key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index);
1401 /* We're using one, so create a matching key
1402 * on the newsk structure. If we fail to get
1403 * memory, then we end up not copying the key
1406 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr,
1407 AF_INET6, 128, l3index, key->key, key->keylen,
1408 sk_gfp_mask(sk, GFP_ATOMIC));
1412 if (__inet_inherit_port(sk, newsk) < 0) {
1413 inet_csk_prepare_forced_close(newsk);
1417 *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash),
1420 tcp_move_syn(newtp, req);
1422 /* Clone pktoptions received with SYN, if we own the req */
1423 if (ireq->pktopts) {
1424 newnp->pktoptions = skb_clone(ireq->pktopts,
1425 sk_gfp_mask(sk, GFP_ATOMIC));
1426 consume_skb(ireq->pktopts);
1427 ireq->pktopts = NULL;
1428 if (newnp->pktoptions) {
1429 tcp_v6_restore_cb(newnp->pktoptions);
1430 skb_set_owner_r(newnp->pktoptions, newsk);
1434 if (!req_unhash && found_dup_sk) {
1435 /* This code path should only be executed in the
1436 * syncookie case only
1438 bh_unlock_sock(newsk);
1447 __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1455 INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *,
1457 /* The socket must have it's spinlock held when we get
1458 * here, unless it is a TCP_LISTEN socket.
1460 * We have a potential double-lock case here, so even when
1461 * doing backlog processing we use the BH locking scheme.
1462 * This is because we cannot sleep with the original spinlock
1465 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1467 struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1468 struct sk_buff *opt_skb = NULL;
1469 struct tcp_sock *tp;
1471 /* Imagine: socket is IPv6. IPv4 packet arrives,
1472 goes to IPv4 receive handler and backlogged.
1473 From backlog it always goes here. Kerboom...
1474 Fortunately, tcp_rcv_established and rcv_established
1475 handle them correctly, but it is not case with
1476 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1479 if (skb->protocol == htons(ETH_P_IP))
1480 return tcp_v4_do_rcv(sk, skb);
1483 * socket locking is here for SMP purposes as backlog rcv
1484 * is currently called with bh processing disabled.
1487 /* Do Stevens' IPV6_PKTOPTIONS.
1489 Yes, guys, it is the only place in our code, where we
1490 may make it not affecting IPv4.
1491 The rest of code is protocol independent,
1492 and I do not like idea to uglify IPv4.
1494 Actually, all the idea behind IPV6_PKTOPTIONS
1495 looks not very well thought. For now we latch
1496 options, received in the last packet, enqueued
1497 by tcp. Feel free to propose better solution.
1501 opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC));
1503 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1504 struct dst_entry *dst = sk->sk_rx_dst;
1506 sock_rps_save_rxhash(sk, skb);
1507 sk_mark_napi_id(sk, skb);
1509 if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1510 INDIRECT_CALL_1(dst->ops->check, ip6_dst_check,
1511 dst, np->rx_dst_cookie) == NULL) {
1513 sk->sk_rx_dst = NULL;
1517 tcp_rcv_established(sk, skb);
1519 goto ipv6_pktoptions;
1523 if (tcp_checksum_complete(skb))
1526 if (sk->sk_state == TCP_LISTEN) {
1527 struct sock *nsk = tcp_v6_cookie_check(sk, skb);
1533 if (tcp_child_process(sk, nsk, skb))
1536 __kfree_skb(opt_skb);
1540 sock_rps_save_rxhash(sk, skb);
1542 if (tcp_rcv_state_process(sk, skb))
1545 goto ipv6_pktoptions;
1549 tcp_v6_send_reset(sk, skb);
1552 __kfree_skb(opt_skb);
1556 trace_tcp_bad_csum(skb);
1557 TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1558 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1563 /* Do you ask, what is it?
1565 1. skb was enqueued by tcp.
1566 2. skb is added to tail of read queue, rather than out of order.
1567 3. socket is not in passive state.
1568 4. Finally, it really contains options, which user wants to receive.
1571 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1572 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1573 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1574 np->mcast_oif = tcp_v6_iif(opt_skb);
1575 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1576 np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1577 if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1578 np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1580 np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1581 if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
1582 skb_set_owner_r(opt_skb, sk);
1583 tcp_v6_restore_cb(opt_skb);
1584 opt_skb = xchg(&np->pktoptions, opt_skb);
1586 __kfree_skb(opt_skb);
1587 opt_skb = xchg(&np->pktoptions, NULL);
1595 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
1596 const struct tcphdr *th)
1598 /* This is tricky: we move IP6CB at its correct location into
1599 * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
1600 * _decode_session6() uses IP6CB().
1601 * barrier() makes sure compiler won't play aliasing games.
1603 memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
1604 sizeof(struct inet6_skb_parm));
1607 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1608 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1609 skb->len - th->doff*4);
1610 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1611 TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1612 TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1613 TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1614 TCP_SKB_CB(skb)->sacked = 0;
1615 TCP_SKB_CB(skb)->has_rxtstamp =
1616 skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1619 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
1621 struct sk_buff *skb_to_free;
1622 int sdif = inet6_sdif(skb);
1623 int dif = inet6_iif(skb);
1624 const struct tcphdr *th;
1625 const struct ipv6hdr *hdr;
1629 struct net *net = dev_net(skb->dev);
1631 if (skb->pkt_type != PACKET_HOST)
1635 * Count it even if it's bad.
1637 __TCP_INC_STATS(net, TCP_MIB_INSEGS);
1639 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1642 th = (const struct tcphdr *)skb->data;
1644 if (unlikely(th->doff < sizeof(struct tcphdr)/4))
1646 if (!pskb_may_pull(skb, th->doff*4))
1649 if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1652 th = (const struct tcphdr *)skb->data;
1653 hdr = ipv6_hdr(skb);
1656 sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th),
1657 th->source, th->dest, inet6_iif(skb), sdif,
1663 if (sk->sk_state == TCP_TIME_WAIT)
1666 if (sk->sk_state == TCP_NEW_SYN_RECV) {
1667 struct request_sock *req = inet_reqsk(sk);
1668 bool req_stolen = false;
1671 sk = req->rsk_listener;
1672 if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif)) {
1673 sk_drops_add(sk, skb);
1677 if (tcp_checksum_complete(skb)) {
1681 if (unlikely(sk->sk_state != TCP_LISTEN)) {
1682 nsk = reuseport_migrate_sock(sk, req_to_sk(req), skb);
1684 inet_csk_reqsk_queue_drop_and_put(sk, req);
1688 /* reuseport_migrate_sock() has already held one sk_refcnt
1696 if (!tcp_filter(sk, skb)) {
1697 th = (const struct tcphdr *)skb->data;
1698 hdr = ipv6_hdr(skb);
1699 tcp_v6_fill_cb(skb, hdr, th);
1700 nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
1705 /* Another cpu got exclusive access to req
1706 * and created a full blown socket.
1707 * Try to feed this packet to this socket
1708 * instead of discarding it.
1710 tcp_v6_restore_cb(skb);
1714 goto discard_and_relse;
1718 tcp_v6_restore_cb(skb);
1719 } else if (tcp_child_process(sk, nsk, skb)) {
1720 tcp_v6_send_reset(nsk, skb);
1721 goto discard_and_relse;
1727 if (hdr->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
1728 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1729 goto discard_and_relse;
1732 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1733 goto discard_and_relse;
1735 if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif))
1736 goto discard_and_relse;
1738 if (tcp_filter(sk, skb))
1739 goto discard_and_relse;
1740 th = (const struct tcphdr *)skb->data;
1741 hdr = ipv6_hdr(skb);
1742 tcp_v6_fill_cb(skb, hdr, th);
1746 if (sk->sk_state == TCP_LISTEN) {
1747 ret = tcp_v6_do_rcv(sk, skb);
1748 goto put_and_return;
1751 sk_incoming_cpu_update(sk);
1753 bh_lock_sock_nested(sk);
1754 tcp_segs_in(tcp_sk(sk), skb);
1756 if (!sock_owned_by_user(sk)) {
1757 skb_to_free = sk->sk_rx_skb_cache;
1758 sk->sk_rx_skb_cache = NULL;
1759 ret = tcp_v6_do_rcv(sk, skb);
1761 if (tcp_add_backlog(sk, skb))
1762 goto discard_and_relse;
1767 __kfree_skb(skb_to_free);
1771 return ret ? -1 : 0;
1774 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1777 tcp_v6_fill_cb(skb, hdr, th);
1779 if (tcp_checksum_complete(skb)) {
1781 trace_tcp_bad_csum(skb);
1782 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1784 __TCP_INC_STATS(net, TCP_MIB_INERRS);
1786 tcp_v6_send_reset(NULL, skb);
1794 sk_drops_add(sk, skb);
1800 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1801 inet_twsk_put(inet_twsk(sk));
1805 tcp_v6_fill_cb(skb, hdr, th);
1807 if (tcp_checksum_complete(skb)) {
1808 inet_twsk_put(inet_twsk(sk));
1812 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1817 sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
1818 skb, __tcp_hdrlen(th),
1819 &ipv6_hdr(skb)->saddr, th->source,
1820 &ipv6_hdr(skb)->daddr,
1822 tcp_v6_iif_l3_slave(skb),
1825 struct inet_timewait_sock *tw = inet_twsk(sk);
1826 inet_twsk_deschedule_put(tw);
1828 tcp_v6_restore_cb(skb);
1836 tcp_v6_timewait_ack(sk, skb);
1839 tcp_v6_send_reset(sk, skb);
1840 inet_twsk_deschedule_put(inet_twsk(sk));
1842 case TCP_TW_SUCCESS:
1848 INDIRECT_CALLABLE_SCOPE void tcp_v6_early_demux(struct sk_buff *skb)
1850 const struct ipv6hdr *hdr;
1851 const struct tcphdr *th;
1854 if (skb->pkt_type != PACKET_HOST)
1857 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1860 hdr = ipv6_hdr(skb);
1863 if (th->doff < sizeof(struct tcphdr) / 4)
1866 /* Note : We use inet6_iif() here, not tcp_v6_iif() */
1867 sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
1868 &hdr->saddr, th->source,
1869 &hdr->daddr, ntohs(th->dest),
1870 inet6_iif(skb), inet6_sdif(skb));
1873 skb->destructor = sock_edemux;
1874 if (sk_fullsock(sk)) {
1875 struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst);
1878 dst = dst_check(dst, tcp_inet6_sk(sk)->rx_dst_cookie);
1880 inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
1881 skb_dst_set_noref(skb, dst);
1886 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1887 .twsk_obj_size = sizeof(struct tcp6_timewait_sock),
1888 .twsk_unique = tcp_twsk_unique,
1889 .twsk_destructor = tcp_twsk_destructor,
1892 INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)
1894 struct ipv6_pinfo *np = inet6_sk(sk);
1896 __tcp_v6_send_check(skb, &np->saddr, &sk->sk_v6_daddr);
1899 const struct inet_connection_sock_af_ops ipv6_specific = {
1900 .queue_xmit = inet6_csk_xmit,
1901 .send_check = tcp_v6_send_check,
1902 .rebuild_header = inet6_sk_rebuild_header,
1903 .sk_rx_dst_set = inet6_sk_rx_dst_set,
1904 .conn_request = tcp_v6_conn_request,
1905 .syn_recv_sock = tcp_v6_syn_recv_sock,
1906 .net_header_len = sizeof(struct ipv6hdr),
1907 .net_frag_header_len = sizeof(struct frag_hdr),
1908 .setsockopt = ipv6_setsockopt,
1909 .getsockopt = ipv6_getsockopt,
1910 .addr2sockaddr = inet6_csk_addr2sockaddr,
1911 .sockaddr_len = sizeof(struct sockaddr_in6),
1912 .mtu_reduced = tcp_v6_mtu_reduced,
1915 #ifdef CONFIG_TCP_MD5SIG
1916 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1917 .md5_lookup = tcp_v6_md5_lookup,
1918 .calc_md5_hash = tcp_v6_md5_hash_skb,
1919 .md5_parse = tcp_v6_parse_md5_keys,
1924 * TCP over IPv4 via INET6 API
1926 static const struct inet_connection_sock_af_ops ipv6_mapped = {
1927 .queue_xmit = ip_queue_xmit,
1928 .send_check = tcp_v4_send_check,
1929 .rebuild_header = inet_sk_rebuild_header,
1930 .sk_rx_dst_set = inet_sk_rx_dst_set,
1931 .conn_request = tcp_v6_conn_request,
1932 .syn_recv_sock = tcp_v6_syn_recv_sock,
1933 .net_header_len = sizeof(struct iphdr),
1934 .setsockopt = ipv6_setsockopt,
1935 .getsockopt = ipv6_getsockopt,
1936 .addr2sockaddr = inet6_csk_addr2sockaddr,
1937 .sockaddr_len = sizeof(struct sockaddr_in6),
1938 .mtu_reduced = tcp_v4_mtu_reduced,
1941 #ifdef CONFIG_TCP_MD5SIG
1942 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1943 .md5_lookup = tcp_v4_md5_lookup,
1944 .calc_md5_hash = tcp_v4_md5_hash_skb,
1945 .md5_parse = tcp_v6_parse_md5_keys,
1949 /* NOTE: A lot of things set to zero explicitly by call to
1950 * sk_alloc() so need not be done here.
1952 static int tcp_v6_init_sock(struct sock *sk)
1954 struct inet_connection_sock *icsk = inet_csk(sk);
1958 icsk->icsk_af_ops = &ipv6_specific;
1960 #ifdef CONFIG_TCP_MD5SIG
1961 tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1967 static void tcp_v6_destroy_sock(struct sock *sk)
1969 tcp_v4_destroy_sock(sk);
1970 inet6_destroy_sock(sk);
1973 #ifdef CONFIG_PROC_FS
1974 /* Proc filesystem TCPv6 sock list dumping. */
1975 static void get_openreq6(struct seq_file *seq,
1976 const struct request_sock *req, int i)
1978 long ttd = req->rsk_timer.expires - jiffies;
1979 const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
1980 const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
1986 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1987 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
1989 src->s6_addr32[0], src->s6_addr32[1],
1990 src->s6_addr32[2], src->s6_addr32[3],
1991 inet_rsk(req)->ir_num,
1992 dest->s6_addr32[0], dest->s6_addr32[1],
1993 dest->s6_addr32[2], dest->s6_addr32[3],
1994 ntohs(inet_rsk(req)->ir_rmt_port),
1996 0, 0, /* could print option size, but that is af dependent. */
1997 1, /* timers active (only the expire timer) */
1998 jiffies_to_clock_t(ttd),
2000 from_kuid_munged(seq_user_ns(seq),
2001 sock_i_uid(req->rsk_listener)),
2002 0, /* non standard timer */
2003 0, /* open_requests have no inode */
2007 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
2009 const struct in6_addr *dest, *src;
2012 unsigned long timer_expires;
2013 const struct inet_sock *inet = inet_sk(sp);
2014 const struct tcp_sock *tp = tcp_sk(sp);
2015 const struct inet_connection_sock *icsk = inet_csk(sp);
2016 const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
2020 dest = &sp->sk_v6_daddr;
2021 src = &sp->sk_v6_rcv_saddr;
2022 destp = ntohs(inet->inet_dport);
2023 srcp = ntohs(inet->inet_sport);
2025 if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
2026 icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
2027 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
2029 timer_expires = icsk->icsk_timeout;
2030 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2032 timer_expires = icsk->icsk_timeout;
2033 } else if (timer_pending(&sp->sk_timer)) {
2035 timer_expires = sp->sk_timer.expires;
2038 timer_expires = jiffies;
2041 state = inet_sk_state_load(sp);
2042 if (state == TCP_LISTEN)
2043 rx_queue = READ_ONCE(sp->sk_ack_backlog);
2045 /* Because we don't lock the socket,
2046 * we might find a transient negative value.
2048 rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
2049 READ_ONCE(tp->copied_seq), 0);
2052 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2053 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
2055 src->s6_addr32[0], src->s6_addr32[1],
2056 src->s6_addr32[2], src->s6_addr32[3], srcp,
2057 dest->s6_addr32[0], dest->s6_addr32[1],
2058 dest->s6_addr32[2], dest->s6_addr32[3], destp,
2060 READ_ONCE(tp->write_seq) - tp->snd_una,
2063 jiffies_delta_to_clock_t(timer_expires - jiffies),
2064 icsk->icsk_retransmits,
2065 from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
2066 icsk->icsk_probes_out,
2068 refcount_read(&sp->sk_refcnt), sp,
2069 jiffies_to_clock_t(icsk->icsk_rto),
2070 jiffies_to_clock_t(icsk->icsk_ack.ato),
2071 (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp),
2073 state == TCP_LISTEN ?
2074 fastopenq->max_qlen :
2075 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
2079 static void get_timewait6_sock(struct seq_file *seq,
2080 struct inet_timewait_sock *tw, int i)
2082 long delta = tw->tw_timer.expires - jiffies;
2083 const struct in6_addr *dest, *src;
2086 dest = &tw->tw_v6_daddr;
2087 src = &tw->tw_v6_rcv_saddr;
2088 destp = ntohs(tw->tw_dport);
2089 srcp = ntohs(tw->tw_sport);
2092 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2093 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
2095 src->s6_addr32[0], src->s6_addr32[1],
2096 src->s6_addr32[2], src->s6_addr32[3], srcp,
2097 dest->s6_addr32[0], dest->s6_addr32[1],
2098 dest->s6_addr32[2], dest->s6_addr32[3], destp,
2099 tw->tw_substate, 0, 0,
2100 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
2101 refcount_read(&tw->tw_refcnt), tw);
2104 static int tcp6_seq_show(struct seq_file *seq, void *v)
2106 struct tcp_iter_state *st;
2107 struct sock *sk = v;
2109 if (v == SEQ_START_TOKEN) {
2114 "st tx_queue rx_queue tr tm->when retrnsmt"
2115 " uid timeout inode\n");
2120 if (sk->sk_state == TCP_TIME_WAIT)
2121 get_timewait6_sock(seq, v, st->num);
2122 else if (sk->sk_state == TCP_NEW_SYN_RECV)
2123 get_openreq6(seq, v, st->num);
2125 get_tcp6_sock(seq, v, st->num);
2130 static const struct seq_operations tcp6_seq_ops = {
2131 .show = tcp6_seq_show,
2132 .start = tcp_seq_start,
2133 .next = tcp_seq_next,
2134 .stop = tcp_seq_stop,
2137 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2141 int __net_init tcp6_proc_init(struct net *net)
2143 if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops,
2144 sizeof(struct tcp_iter_state), &tcp6_seq_afinfo))
2149 void tcp6_proc_exit(struct net *net)
2151 remove_proc_entry("tcp6", net->proc_net);
2155 struct proto tcpv6_prot = {
2157 .owner = THIS_MODULE,
2159 .pre_connect = tcp_v6_pre_connect,
2160 .connect = tcp_v6_connect,
2161 .disconnect = tcp_disconnect,
2162 .accept = inet_csk_accept,
2164 .init = tcp_v6_init_sock,
2165 .destroy = tcp_v6_destroy_sock,
2166 .shutdown = tcp_shutdown,
2167 .setsockopt = tcp_setsockopt,
2168 .getsockopt = tcp_getsockopt,
2169 .bpf_bypass_getsockopt = tcp_bpf_bypass_getsockopt,
2170 .keepalive = tcp_set_keepalive,
2171 .recvmsg = tcp_recvmsg,
2172 .sendmsg = tcp_sendmsg,
2173 .sendpage = tcp_sendpage,
2174 .backlog_rcv = tcp_v6_do_rcv,
2175 .release_cb = tcp_release_cb,
2177 .unhash = inet_unhash,
2178 .get_port = inet_csk_get_port,
2179 #ifdef CONFIG_BPF_SYSCALL
2180 .psock_update_sk_prot = tcp_bpf_update_proto,
2182 .enter_memory_pressure = tcp_enter_memory_pressure,
2183 .leave_memory_pressure = tcp_leave_memory_pressure,
2184 .stream_memory_free = tcp_stream_memory_free,
2185 .sockets_allocated = &tcp_sockets_allocated,
2186 .memory_allocated = &tcp_memory_allocated,
2187 .memory_pressure = &tcp_memory_pressure,
2188 .orphan_count = &tcp_orphan_count,
2189 .sysctl_mem = sysctl_tcp_mem,
2190 .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem),
2191 .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem),
2192 .max_header = MAX_TCP_HEADER,
2193 .obj_size = sizeof(struct tcp6_sock),
2194 .slab_flags = SLAB_TYPESAFE_BY_RCU,
2195 .twsk_prot = &tcp6_timewait_sock_ops,
2196 .rsk_prot = &tcp6_request_sock_ops,
2197 .h.hashinfo = &tcp_hashinfo,
2198 .no_autobind = true,
2199 .diag_destroy = tcp_abort,
2201 EXPORT_SYMBOL_GPL(tcpv6_prot);
2203 /* thinking of making this const? Don't.
2204 * early_demux can change based on sysctl.
2206 static struct inet6_protocol tcpv6_protocol = {
2207 .early_demux = tcp_v6_early_demux,
2208 .early_demux_handler = tcp_v6_early_demux,
2209 .handler = tcp_v6_rcv,
2210 .err_handler = tcp_v6_err,
2211 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2214 static struct inet_protosw tcpv6_protosw = {
2215 .type = SOCK_STREAM,
2216 .protocol = IPPROTO_TCP,
2217 .prot = &tcpv6_prot,
2218 .ops = &inet6_stream_ops,
2219 .flags = INET_PROTOSW_PERMANENT |
2223 static int __net_init tcpv6_net_init(struct net *net)
2225 return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2226 SOCK_RAW, IPPROTO_TCP, net);
2229 static void __net_exit tcpv6_net_exit(struct net *net)
2231 inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2234 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
2236 inet_twsk_purge(&tcp_hashinfo, AF_INET6);
2239 static struct pernet_operations tcpv6_net_ops = {
2240 .init = tcpv6_net_init,
2241 .exit = tcpv6_net_exit,
2242 .exit_batch = tcpv6_net_exit_batch,
2245 int __init tcpv6_init(void)
2249 ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2253 /* register inet6 protocol */
2254 ret = inet6_register_protosw(&tcpv6_protosw);
2256 goto out_tcpv6_protocol;
2258 ret = register_pernet_subsys(&tcpv6_net_ops);
2260 goto out_tcpv6_protosw;
2262 ret = mptcpv6_init();
2264 goto out_tcpv6_pernet_subsys;
2269 out_tcpv6_pernet_subsys:
2270 unregister_pernet_subsys(&tcpv6_net_ops);
2272 inet6_unregister_protosw(&tcpv6_protosw);
2274 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2278 void tcpv6_exit(void)
2280 unregister_pernet_subsys(&tcpv6_net_ops);
2281 inet6_unregister_protosw(&tcpv6_protosw);
2282 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);