1 // SPDX-License-Identifier: GPL-2.0-or-later
4 * Linux INET6 implementation
7 * Pedro Roque <roque@di.fc.ul.pt>
10 * linux/net/ipv4/tcp.c
11 * linux/net/ipv4/tcp_input.c
12 * linux/net/ipv4/tcp_output.c
15 * Hideaki YOSHIFUJI : sin6_scope_id support
16 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
17 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
18 * a single port at the same time.
19 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
22 #include <linux/bottom_half.h>
23 #include <linux/module.h>
24 #include <linux/errno.h>
25 #include <linux/types.h>
26 #include <linux/socket.h>
27 #include <linux/sockios.h>
28 #include <linux/net.h>
29 #include <linux/jiffies.h>
31 #include <linux/in6.h>
32 #include <linux/netdevice.h>
33 #include <linux/init.h>
34 #include <linux/jhash.h>
35 #include <linux/ipsec.h>
36 #include <linux/times.h>
37 #include <linux/slab.h>
38 #include <linux/uaccess.h>
39 #include <linux/ipv6.h>
40 #include <linux/icmpv6.h>
41 #include <linux/random.h>
42 #include <linux/indirect_call_wrapper.h>
45 #include <net/ndisc.h>
46 #include <net/inet6_hashtables.h>
47 #include <net/inet6_connection_sock.h>
49 #include <net/transp_v6.h>
50 #include <net/addrconf.h>
51 #include <net/ip6_route.h>
52 #include <net/ip6_checksum.h>
53 #include <net/inet_ecn.h>
54 #include <net/protocol.h>
57 #include <net/dsfield.h>
58 #include <net/timewait_sock.h>
59 #include <net/inet_common.h>
60 #include <net/secure_seq.h>
61 #include <net/busy_poll.h>
63 #include <linux/proc_fs.h>
64 #include <linux/seq_file.h>
66 #include <crypto/hash.h>
67 #include <linux/scatterlist.h>
69 #include <trace/events/tcp.h>
71 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
72 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
73 struct request_sock *req);
75 INDIRECT_CALLABLE_SCOPE int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
77 static const struct inet_connection_sock_af_ops ipv6_mapped;
78 const struct inet_connection_sock_af_ops ipv6_specific;
79 #ifdef CONFIG_TCP_MD5SIG
80 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
81 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
83 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
84 const struct in6_addr *addr,
91 /* Helper returning the inet6 address from a given tcp socket.
92 * It can be used in TCP stack instead of inet6_sk(sk).
93 * This avoids a dereference and allow compiler optimizations.
94 * It is a specialized version of inet6_sk_generic().
96 static struct ipv6_pinfo *tcp_inet6_sk(const struct sock *sk)
98 unsigned int offset = sizeof(struct tcp6_sock) - sizeof(struct ipv6_pinfo);
100 return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
103 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
105 struct dst_entry *dst = skb_dst(skb);
107 if (dst && dst_hold_safe(dst)) {
108 const struct rt6_info *rt = (const struct rt6_info *)dst;
110 rcu_assign_pointer(sk->sk_rx_dst, dst);
111 sk->sk_rx_dst_ifindex = skb->skb_iif;
112 sk->sk_rx_dst_cookie = rt6_get_cookie(rt);
116 static u32 tcp_v6_init_seq(const struct sk_buff *skb)
118 return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32,
119 ipv6_hdr(skb)->saddr.s6_addr32,
121 tcp_hdr(skb)->source);
124 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
126 return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32,
127 ipv6_hdr(skb)->saddr.s6_addr32);
130 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
133 /* This check is replicated from tcp_v6_connect() and intended to
134 * prevent BPF program called below from accessing bytes that are out
135 * of the bound specified by user in addr_len.
137 if (addr_len < SIN6_LEN_RFC2133)
140 sock_owned_by_me(sk);
142 return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr);
145 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
148 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
149 struct inet_connection_sock *icsk = inet_csk(sk);
150 struct in6_addr *saddr = NULL, *final_p, final;
151 struct inet_timewait_death_row *tcp_death_row;
152 struct ipv6_pinfo *np = tcp_inet6_sk(sk);
153 struct inet_sock *inet = inet_sk(sk);
154 struct tcp_sock *tp = tcp_sk(sk);
155 struct net *net = sock_net(sk);
156 struct ipv6_txoptions *opt;
157 struct dst_entry *dst;
162 if (addr_len < SIN6_LEN_RFC2133)
165 if (usin->sin6_family != AF_INET6)
166 return -EAFNOSUPPORT;
168 memset(&fl6, 0, sizeof(fl6));
171 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
172 IP6_ECN_flow_init(fl6.flowlabel);
173 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
174 struct ip6_flowlabel *flowlabel;
175 flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
176 if (IS_ERR(flowlabel))
178 fl6_sock_release(flowlabel);
183 * connect() to INADDR_ANY means loopback (BSD'ism).
186 if (ipv6_addr_any(&usin->sin6_addr)) {
187 if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
188 ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
191 usin->sin6_addr = in6addr_loopback;
194 addr_type = ipv6_addr_type(&usin->sin6_addr);
196 if (addr_type & IPV6_ADDR_MULTICAST)
199 if (addr_type&IPV6_ADDR_LINKLOCAL) {
200 if (addr_len >= sizeof(struct sockaddr_in6) &&
201 usin->sin6_scope_id) {
202 /* If interface is set while binding, indices
205 if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id))
208 sk->sk_bound_dev_if = usin->sin6_scope_id;
211 /* Connect to link-local address requires an interface */
212 if (!sk->sk_bound_dev_if)
216 if (tp->rx_opt.ts_recent_stamp &&
217 !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
218 tp->rx_opt.ts_recent = 0;
219 tp->rx_opt.ts_recent_stamp = 0;
220 WRITE_ONCE(tp->write_seq, 0);
223 sk->sk_v6_daddr = usin->sin6_addr;
224 np->flow_label = fl6.flowlabel;
230 if (addr_type & IPV6_ADDR_MAPPED) {
231 u32 exthdrlen = icsk->icsk_ext_hdr_len;
232 struct sockaddr_in sin;
234 if (ipv6_only_sock(sk))
237 sin.sin_family = AF_INET;
238 sin.sin_port = usin->sin6_port;
239 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
241 /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */
242 WRITE_ONCE(icsk->icsk_af_ops, &ipv6_mapped);
244 mptcpv6_handle_mapped(sk, true);
245 sk->sk_backlog_rcv = tcp_v4_do_rcv;
246 #ifdef CONFIG_TCP_MD5SIG
247 tp->af_specific = &tcp_sock_ipv6_mapped_specific;
250 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
253 icsk->icsk_ext_hdr_len = exthdrlen;
254 /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */
255 WRITE_ONCE(icsk->icsk_af_ops, &ipv6_specific);
257 mptcpv6_handle_mapped(sk, false);
258 sk->sk_backlog_rcv = tcp_v6_do_rcv;
259 #ifdef CONFIG_TCP_MD5SIG
260 tp->af_specific = &tcp_sock_ipv6_specific;
264 np->saddr = sk->sk_v6_rcv_saddr;
269 if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
270 saddr = &sk->sk_v6_rcv_saddr;
272 fl6.flowi6_proto = IPPROTO_TCP;
273 fl6.daddr = sk->sk_v6_daddr;
274 fl6.saddr = saddr ? *saddr : np->saddr;
275 fl6.flowi6_oif = sk->sk_bound_dev_if;
276 fl6.flowi6_mark = sk->sk_mark;
277 fl6.fl6_dport = usin->sin6_port;
278 fl6.fl6_sport = inet->inet_sport;
279 fl6.flowi6_uid = sk->sk_uid;
281 opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
282 final_p = fl6_update_dst(&fl6, opt, &final);
284 security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
286 dst = ip6_dst_lookup_flow(net, sk, &fl6, final_p);
292 tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
295 struct inet_bind_hashbucket *prev_addr_hashbucket = NULL;
296 struct in6_addr prev_v6_rcv_saddr;
298 if (icsk->icsk_bind2_hash) {
299 prev_addr_hashbucket = inet_bhashfn_portaddr(tcp_death_row->hashinfo,
300 sk, net, inet->inet_num);
301 prev_v6_rcv_saddr = sk->sk_v6_rcv_saddr;
304 sk->sk_v6_rcv_saddr = *saddr;
306 if (prev_addr_hashbucket) {
307 err = inet_bhash2_update_saddr(prev_addr_hashbucket, sk);
309 sk->sk_v6_rcv_saddr = prev_v6_rcv_saddr;
315 /* set the source address */
317 inet->inet_rcv_saddr = LOOPBACK4_IPV6;
319 sk->sk_gso_type = SKB_GSO_TCPV6;
320 ip6_dst_store(sk, dst, NULL, NULL);
322 icsk->icsk_ext_hdr_len = 0;
324 icsk->icsk_ext_hdr_len = opt->opt_flen +
327 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
329 inet->inet_dport = usin->sin6_port;
331 tcp_set_state(sk, TCP_SYN_SENT);
332 err = inet6_hash_connect(tcp_death_row, sk);
338 if (likely(!tp->repair)) {
340 WRITE_ONCE(tp->write_seq,
341 secure_tcpv6_seq(np->saddr.s6_addr32,
342 sk->sk_v6_daddr.s6_addr32,
345 tp->tsoffset = secure_tcpv6_ts_off(net, np->saddr.s6_addr32,
346 sk->sk_v6_daddr.s6_addr32);
349 if (tcp_fastopen_defer_connect(sk, &err))
354 err = tcp_connect(sk);
361 tcp_set_state(sk, TCP_CLOSE);
362 if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
363 inet_reset_saddr(sk);
365 inet->inet_dport = 0;
366 sk->sk_route_caps = 0;
370 static void tcp_v6_mtu_reduced(struct sock *sk)
372 struct dst_entry *dst;
375 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
378 mtu = READ_ONCE(tcp_sk(sk)->mtu_info);
380 /* Drop requests trying to increase our current mss.
381 * Check done in __ip6_rt_update_pmtu() is too late.
383 if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache)
386 dst = inet6_csk_update_pmtu(sk, mtu);
390 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
391 tcp_sync_mss(sk, dst_mtu(dst));
392 tcp_simple_retransmit(sk);
396 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
397 u8 type, u8 code, int offset, __be32 info)
399 const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
400 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
401 struct net *net = dev_net(skb->dev);
402 struct request_sock *fastopen;
403 struct ipv6_pinfo *np;
410 sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
411 &hdr->daddr, th->dest,
412 &hdr->saddr, ntohs(th->source),
413 skb->dev->ifindex, inet6_sdif(skb));
416 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
421 if (sk->sk_state == TCP_TIME_WAIT) {
422 inet_twsk_put(inet_twsk(sk));
425 seq = ntohl(th->seq);
426 fatal = icmpv6_err_convert(type, code, &err);
427 if (sk->sk_state == TCP_NEW_SYN_RECV) {
428 tcp_req_err(sk, seq, fatal);
433 if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
434 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
436 if (sk->sk_state == TCP_CLOSE)
439 if (static_branch_unlikely(&ip6_min_hopcount)) {
440 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
441 if (ipv6_hdr(skb)->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) {
442 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
448 /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
449 fastopen = rcu_dereference(tp->fastopen_rsk);
450 snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
451 if (sk->sk_state != TCP_LISTEN &&
452 !between(seq, snd_una, tp->snd_nxt)) {
453 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
457 np = tcp_inet6_sk(sk);
459 if (type == NDISC_REDIRECT) {
460 if (!sock_owned_by_user(sk)) {
461 struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
464 dst->ops->redirect(dst, sk, skb);
469 if (type == ICMPV6_PKT_TOOBIG) {
470 u32 mtu = ntohl(info);
472 /* We are not interested in TCP_LISTEN and open_requests
473 * (SYN-ACKs send out by Linux are always <576bytes so
474 * they should go through unfragmented).
476 if (sk->sk_state == TCP_LISTEN)
479 if (!ip6_sk_accept_pmtu(sk))
482 if (mtu < IPV6_MIN_MTU)
485 WRITE_ONCE(tp->mtu_info, mtu);
487 if (!sock_owned_by_user(sk))
488 tcp_v6_mtu_reduced(sk);
489 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
496 /* Might be for an request_sock */
497 switch (sk->sk_state) {
500 /* Only in fast or simultaneous open. If a fast open socket is
501 * already accepted it is treated as a connected one below.
503 if (fastopen && !fastopen->sk)
506 ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th);
508 if (!sock_owned_by_user(sk)) {
510 sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
514 sk->sk_err_soft = err;
519 /* check if this ICMP message allows revert of backoff.
522 if (!fastopen && type == ICMPV6_DEST_UNREACH &&
523 code == ICMPV6_NOROUTE)
524 tcp_ld_RTO_revert(sk, seq);
527 if (!sock_owned_by_user(sk) && np->recverr) {
531 sk->sk_err_soft = err;
540 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
542 struct request_sock *req,
543 struct tcp_fastopen_cookie *foc,
544 enum tcp_synack_type synack_type,
545 struct sk_buff *syn_skb)
547 struct inet_request_sock *ireq = inet_rsk(req);
548 struct ipv6_pinfo *np = tcp_inet6_sk(sk);
549 struct ipv6_txoptions *opt;
550 struct flowi6 *fl6 = &fl->u.ip6;
555 /* First, grab a route. */
556 if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
557 IPPROTO_TCP)) == NULL)
560 skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb);
563 __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
564 &ireq->ir_v6_rmt_addr);
566 fl6->daddr = ireq->ir_v6_rmt_addr;
567 if (np->repflow && ireq->pktopts)
568 fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
570 tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ?
571 (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
572 (np->tclass & INET_ECN_MASK) :
575 if (!INET_ECN_is_capable(tclass) &&
576 tcp_bpf_ca_needs_ecn((struct sock *)req))
577 tclass |= INET_ECN_ECT_0;
580 opt = ireq->ipv6_opt;
582 opt = rcu_dereference(np->opt);
583 err = ip6_xmit(sk, skb, fl6, skb->mark ? : sk->sk_mark, opt,
584 tclass, sk->sk_priority);
586 err = net_xmit_eval(err);
594 static void tcp_v6_reqsk_destructor(struct request_sock *req)
596 kfree(inet_rsk(req)->ipv6_opt);
597 consume_skb(inet_rsk(req)->pktopts);
600 #ifdef CONFIG_TCP_MD5SIG
601 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
602 const struct in6_addr *addr,
605 return tcp_md5_do_lookup(sk, l3index,
606 (union tcp_md5_addr *)addr, AF_INET6);
609 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
610 const struct sock *addr_sk)
614 l3index = l3mdev_master_ifindex_by_index(sock_net(sk),
615 addr_sk->sk_bound_dev_if);
616 return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr,
620 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
621 sockptr_t optval, int optlen)
623 struct tcp_md5sig cmd;
624 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
629 if (optlen < sizeof(cmd))
632 if (copy_from_sockptr(&cmd, optval, sizeof(cmd)))
635 if (sin6->sin6_family != AF_INET6)
638 flags = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX;
640 if (optname == TCP_MD5SIG_EXT &&
641 cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
642 prefixlen = cmd.tcpm_prefixlen;
643 if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
647 prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
650 if (optname == TCP_MD5SIG_EXT && cmd.tcpm_ifindex &&
651 cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) {
652 struct net_device *dev;
655 dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex);
656 if (dev && netif_is_l3_master(dev))
657 l3index = dev->ifindex;
660 /* ok to reference set/not set outside of rcu;
661 * right now device MUST be an L3 master
663 if (!dev || !l3index)
667 if (!cmd.tcpm_keylen) {
668 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
669 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
672 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
673 AF_INET6, prefixlen, l3index, flags);
676 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
679 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
680 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
681 AF_INET, prefixlen, l3index, flags,
682 cmd.tcpm_key, cmd.tcpm_keylen,
685 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
686 AF_INET6, prefixlen, l3index, flags,
687 cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
690 static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
691 const struct in6_addr *daddr,
692 const struct in6_addr *saddr,
693 const struct tcphdr *th, int nbytes)
695 struct tcp6_pseudohdr *bp;
696 struct scatterlist sg;
700 /* 1. TCP pseudo-header (RFC2460) */
703 bp->protocol = cpu_to_be32(IPPROTO_TCP);
704 bp->len = cpu_to_be32(nbytes);
706 _th = (struct tcphdr *)(bp + 1);
707 memcpy(_th, th, sizeof(*th));
710 sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
711 ahash_request_set_crypt(hp->md5_req, &sg, NULL,
712 sizeof(*bp) + sizeof(*th));
713 return crypto_ahash_update(hp->md5_req);
716 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
717 const struct in6_addr *daddr, struct in6_addr *saddr,
718 const struct tcphdr *th)
720 struct tcp_md5sig_pool *hp;
721 struct ahash_request *req;
723 hp = tcp_get_md5sig_pool();
725 goto clear_hash_noput;
728 if (crypto_ahash_init(req))
730 if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
732 if (tcp_md5_hash_key(hp, key))
734 ahash_request_set_crypt(req, NULL, md5_hash, 0);
735 if (crypto_ahash_final(req))
738 tcp_put_md5sig_pool();
742 tcp_put_md5sig_pool();
744 memset(md5_hash, 0, 16);
748 static int tcp_v6_md5_hash_skb(char *md5_hash,
749 const struct tcp_md5sig_key *key,
750 const struct sock *sk,
751 const struct sk_buff *skb)
753 const struct in6_addr *saddr, *daddr;
754 struct tcp_md5sig_pool *hp;
755 struct ahash_request *req;
756 const struct tcphdr *th = tcp_hdr(skb);
758 if (sk) { /* valid for establish/request sockets */
759 saddr = &sk->sk_v6_rcv_saddr;
760 daddr = &sk->sk_v6_daddr;
762 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
763 saddr = &ip6h->saddr;
764 daddr = &ip6h->daddr;
767 hp = tcp_get_md5sig_pool();
769 goto clear_hash_noput;
772 if (crypto_ahash_init(req))
775 if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
777 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
779 if (tcp_md5_hash_key(hp, key))
781 ahash_request_set_crypt(req, NULL, md5_hash, 0);
782 if (crypto_ahash_final(req))
785 tcp_put_md5sig_pool();
789 tcp_put_md5sig_pool();
791 memset(md5_hash, 0, 16);
797 static void tcp_v6_init_req(struct request_sock *req,
798 const struct sock *sk_listener,
801 bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
802 struct inet_request_sock *ireq = inet_rsk(req);
803 const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener);
805 ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
806 ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
808 /* So that link locals have meaning */
809 if ((!sk_listener->sk_bound_dev_if || l3_slave) &&
810 ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
811 ireq->ir_iif = tcp_v6_iif(skb);
813 if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
814 (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
815 np->rxopt.bits.rxinfo ||
816 np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
817 np->rxopt.bits.rxohlim || np->repflow)) {
818 refcount_inc(&skb->users);
823 static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
826 struct request_sock *req)
828 tcp_v6_init_req(req, sk, skb);
830 if (security_inet_conn_request(sk, skb, req))
833 return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
836 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
838 .obj_size = sizeof(struct tcp6_request_sock),
839 .rtx_syn_ack = tcp_rtx_synack,
840 .send_ack = tcp_v6_reqsk_send_ack,
841 .destructor = tcp_v6_reqsk_destructor,
842 .send_reset = tcp_v6_send_reset,
843 .syn_ack_timeout = tcp_syn_ack_timeout,
846 const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
847 .mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) -
848 sizeof(struct ipv6hdr),
849 #ifdef CONFIG_TCP_MD5SIG
850 .req_md5_lookup = tcp_v6_md5_lookup,
851 .calc_md5_hash = tcp_v6_md5_hash_skb,
853 #ifdef CONFIG_SYN_COOKIES
854 .cookie_init_seq = cookie_v6_init_sequence,
856 .route_req = tcp_v6_route_req,
857 .init_seq = tcp_v6_init_seq,
858 .init_ts_off = tcp_v6_init_ts_off,
859 .send_synack = tcp_v6_send_synack,
862 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
863 u32 ack, u32 win, u32 tsval, u32 tsecr,
864 int oif, struct tcp_md5sig_key *key, int rst,
865 u8 tclass, __be32 label, u32 priority, u32 txhash)
867 const struct tcphdr *th = tcp_hdr(skb);
869 struct sk_buff *buff;
871 struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
872 struct sock *ctl_sk = net->ipv6.tcp_sk;
873 unsigned int tot_len = sizeof(struct tcphdr);
874 __be32 mrst = 0, *topt;
875 struct dst_entry *dst;
879 tot_len += TCPOLEN_TSTAMP_ALIGNED;
880 #ifdef CONFIG_TCP_MD5SIG
882 tot_len += TCPOLEN_MD5SIG_ALIGNED;
887 mrst = mptcp_reset_option(skb);
890 tot_len += sizeof(__be32);
894 buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
898 skb_reserve(buff, MAX_TCP_HEADER);
900 t1 = skb_push(buff, tot_len);
901 skb_reset_transport_header(buff);
903 /* Swap the send and the receive. */
904 memset(t1, 0, sizeof(*t1));
905 t1->dest = th->source;
906 t1->source = th->dest;
907 t1->doff = tot_len / 4;
908 t1->seq = htonl(seq);
909 t1->ack_seq = htonl(ack);
910 t1->ack = !rst || !th->ack;
912 t1->window = htons(win);
914 topt = (__be32 *)(t1 + 1);
917 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
918 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
919 *topt++ = htonl(tsval);
920 *topt++ = htonl(tsecr);
926 #ifdef CONFIG_TCP_MD5SIG
928 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
929 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
930 tcp_v6_md5_hash_hdr((__u8 *)topt, key,
931 &ipv6_hdr(skb)->saddr,
932 &ipv6_hdr(skb)->daddr, t1);
936 memset(&fl6, 0, sizeof(fl6));
937 fl6.daddr = ipv6_hdr(skb)->saddr;
938 fl6.saddr = ipv6_hdr(skb)->daddr;
939 fl6.flowlabel = label;
941 buff->ip_summed = CHECKSUM_PARTIAL;
943 __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
945 fl6.flowi6_proto = IPPROTO_TCP;
946 if (rt6_need_strict(&fl6.daddr) && !oif)
947 fl6.flowi6_oif = tcp_v6_iif(skb);
949 if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
952 fl6.flowi6_oif = oif;
956 if (sk->sk_state == TCP_TIME_WAIT)
957 mark = inet_twsk(sk)->tw_mark;
960 skb_set_delivery_time(buff, tcp_transmit_time(sk), true);
963 /* autoflowlabel/skb_get_hash_flowi6 rely on buff->hash */
964 skb_set_hash(buff, txhash, PKT_HASH_TYPE_L4);
966 fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark;
967 fl6.fl6_dport = t1->dest;
968 fl6.fl6_sport = t1->source;
969 fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
970 security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
972 /* Pass a socket to ip6_dst_lookup either it is for RST
973 * Underlying function will use this to retrieve the network
976 if (sk && sk->sk_state != TCP_TIME_WAIT)
977 dst = ip6_dst_lookup_flow(net, sk, &fl6, NULL); /*sk's xfrm_policy can be referred*/
979 dst = ip6_dst_lookup_flow(net, ctl_sk, &fl6, NULL);
981 skb_dst_set(buff, dst);
982 ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL,
983 tclass & ~INET_ECN_MASK, priority);
984 TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
986 TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
993 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
995 const struct tcphdr *th = tcp_hdr(skb);
996 struct ipv6hdr *ipv6h = ipv6_hdr(skb);
997 u32 seq = 0, ack_seq = 0;
998 struct tcp_md5sig_key *key = NULL;
999 #ifdef CONFIG_TCP_MD5SIG
1000 const __u8 *hash_location = NULL;
1001 unsigned char newhash[16];
1003 struct sock *sk1 = NULL;
1014 /* If sk not NULL, it means we did a successful lookup and incoming
1015 * route had to be correct. prequeue might have dropped our dst.
1017 if (!sk && !ipv6_unicast_destination(skb))
1020 net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
1021 #ifdef CONFIG_TCP_MD5SIG
1023 hash_location = tcp_parse_md5sig_option(th);
1024 if (sk && sk_fullsock(sk)) {
1027 /* sdif set, means packet ingressed via a device
1028 * in an L3 domain and inet_iif is set to it.
1030 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1031 key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index);
1032 } else if (hash_location) {
1033 int dif = tcp_v6_iif_l3_slave(skb);
1034 int sdif = tcp_v6_sdif(skb);
1038 * active side is lost. Try to find listening socket through
1039 * source port, and then find md5 key through listening socket.
1040 * we are not loose security here:
1041 * Incoming packet is checked with md5 hash with finding key,
1042 * no RST generated if md5 hash doesn't match.
1044 sk1 = inet6_lookup_listener(net, net->ipv4.tcp_death_row.hashinfo,
1045 NULL, 0, &ipv6h->saddr, th->source,
1046 &ipv6h->daddr, ntohs(th->source),
1051 /* sdif set, means packet ingressed via a device
1052 * in an L3 domain and dif is set to it.
1054 l3index = tcp_v6_sdif(skb) ? dif : 0;
1056 key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index);
1060 genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
1061 if (genhash || memcmp(hash_location, newhash, 16) != 0)
1067 seq = ntohl(th->ack_seq);
1069 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
1073 oif = sk->sk_bound_dev_if;
1074 if (sk_fullsock(sk)) {
1075 const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1077 trace_tcp_send_reset(sk, skb);
1079 label = ip6_flowlabel(ipv6h);
1080 priority = sk->sk_priority;
1081 txhash = sk->sk_hash;
1083 if (sk->sk_state == TCP_TIME_WAIT) {
1084 label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel);
1085 priority = inet_twsk(sk)->tw_priority;
1086 txhash = inet_twsk(sk)->tw_txhash;
1089 if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET)
1090 label = ip6_flowlabel(ipv6h);
1093 tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1,
1094 ipv6_get_dsfield(ipv6h), label, priority, txhash);
1096 #ifdef CONFIG_TCP_MD5SIG
1102 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
1103 u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
1104 struct tcp_md5sig_key *key, u8 tclass,
1105 __be32 label, u32 priority, u32 txhash)
1107 tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
1108 tclass, label, priority, txhash);
1111 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1113 struct inet_timewait_sock *tw = inet_twsk(sk);
1114 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1116 tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1117 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1118 tcp_time_stamp_raw() + tcptw->tw_ts_offset,
1119 tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
1120 tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority,
1126 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
1127 struct request_sock *req)
1131 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1133 /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
1134 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
1137 * The window field (SEG.WND) of every outgoing segment, with the
1138 * exception of <SYN> segments, MUST be right-shifted by
1139 * Rcv.Wind.Shift bits:
1141 tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
1142 tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
1143 tcp_rsk(req)->rcv_nxt,
1144 req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
1145 tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
1146 req->ts_recent, sk->sk_bound_dev_if,
1147 tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index),
1148 ipv6_get_dsfield(ipv6_hdr(skb)), 0, sk->sk_priority,
1149 tcp_rsk(req)->txhash);
1153 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
1155 #ifdef CONFIG_SYN_COOKIES
1156 const struct tcphdr *th = tcp_hdr(skb);
1159 sk = cookie_v6_check(sk, skb);
1164 u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
1165 struct tcphdr *th, u32 *cookie)
1168 #ifdef CONFIG_SYN_COOKIES
1169 mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops,
1170 &tcp_request_sock_ipv6_ops, sk, th);
1172 *cookie = __cookie_v6_init_sequence(iph, th, &mss);
1173 tcp_synq_overflow(sk);
1179 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1181 if (skb->protocol == htons(ETH_P_IP))
1182 return tcp_v4_conn_request(sk, skb);
1184 if (!ipv6_unicast_destination(skb))
1187 if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) {
1188 __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS);
1192 return tcp_conn_request(&tcp6_request_sock_ops,
1193 &tcp_request_sock_ipv6_ops, sk, skb);
1197 return 0; /* don't send reset */
1200 static void tcp_v6_restore_cb(struct sk_buff *skb)
1202 /* We need to move header back to the beginning if xfrm6_policy_check()
1203 * and tcp_v6_fill_cb() are going to be called again.
1204 * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
1206 memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
1207 sizeof(struct inet6_skb_parm));
1210 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1211 struct request_sock *req,
1212 struct dst_entry *dst,
1213 struct request_sock *req_unhash,
1216 struct inet_request_sock *ireq;
1217 struct ipv6_pinfo *newnp;
1218 const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1219 struct ipv6_txoptions *opt;
1220 struct inet_sock *newinet;
1221 bool found_dup_sk = false;
1222 struct tcp_sock *newtp;
1224 #ifdef CONFIG_TCP_MD5SIG
1225 struct tcp_md5sig_key *key;
1230 if (skb->protocol == htons(ETH_P_IP)) {
1235 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
1236 req_unhash, own_req);
1241 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1243 newnp = tcp_inet6_sk(newsk);
1244 newtp = tcp_sk(newsk);
1246 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1248 newnp->saddr = newsk->sk_v6_rcv_saddr;
1250 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1251 if (sk_is_mptcp(newsk))
1252 mptcpv6_handle_mapped(newsk, true);
1253 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1254 #ifdef CONFIG_TCP_MD5SIG
1255 newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1258 newnp->ipv6_mc_list = NULL;
1259 newnp->ipv6_ac_list = NULL;
1260 newnp->ipv6_fl_list = NULL;
1261 newnp->pktoptions = NULL;
1263 newnp->mcast_oif = inet_iif(skb);
1264 newnp->mcast_hops = ip_hdr(skb)->ttl;
1265 newnp->rcv_flowinfo = 0;
1267 newnp->flow_label = 0;
1270 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1271 * here, tcp_create_openreq_child now does this for us, see the comment in
1272 * that function for the gory details. -acme
1275 /* It is tricky place. Until this moment IPv4 tcp
1276 worked with IPv6 icsk.icsk_af_ops.
1279 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1284 ireq = inet_rsk(req);
1286 if (sk_acceptq_is_full(sk))
1290 dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
1295 newsk = tcp_create_openreq_child(sk, req, skb);
1300 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1301 * count here, tcp_create_openreq_child now does this for us, see the
1302 * comment in that function for the gory details. -acme
1305 newsk->sk_gso_type = SKB_GSO_TCPV6;
1306 ip6_dst_store(newsk, dst, NULL, NULL);
1307 inet6_sk_rx_dst_set(newsk, skb);
1309 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1311 newtp = tcp_sk(newsk);
1312 newinet = inet_sk(newsk);
1313 newnp = tcp_inet6_sk(newsk);
1315 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1317 newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
1318 newnp->saddr = ireq->ir_v6_loc_addr;
1319 newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
1320 newsk->sk_bound_dev_if = ireq->ir_iif;
1322 /* Now IPv6 options...
1324 First: no IPv4 options.
1326 newinet->inet_opt = NULL;
1327 newnp->ipv6_mc_list = NULL;
1328 newnp->ipv6_ac_list = NULL;
1329 newnp->ipv6_fl_list = NULL;
1332 newnp->rxopt.all = np->rxopt.all;
1334 newnp->pktoptions = NULL;
1336 newnp->mcast_oif = tcp_v6_iif(skb);
1337 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1338 newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1340 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1342 /* Set ToS of the new socket based upon the value of incoming SYN.
1343 * ECT bits are set later in tcp_init_transfer().
1345 if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos))
1346 newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;
1348 /* Clone native IPv6 options from listening socket (if any)
1350 Yes, keeping reference count would be much more clever,
1351 but we make one more one thing there: reattach optmem
1354 opt = ireq->ipv6_opt;
1356 opt = rcu_dereference(np->opt);
1358 opt = ipv6_dup_options(newsk, opt);
1359 RCU_INIT_POINTER(newnp->opt, opt);
1361 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1363 inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
1366 tcp_ca_openreq_child(newsk, dst);
1368 tcp_sync_mss(newsk, dst_mtu(dst));
1369 newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1371 tcp_initialize_rcv_mss(newsk);
1373 newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1374 newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1376 #ifdef CONFIG_TCP_MD5SIG
1377 l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif);
1379 /* Copy over the MD5 key from the original socket */
1380 key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index);
1382 /* We're using one, so create a matching key
1383 * on the newsk structure. If we fail to get
1384 * memory, then we end up not copying the key
1387 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr,
1388 AF_INET6, 128, l3index, key->flags, key->key, key->keylen,
1389 sk_gfp_mask(sk, GFP_ATOMIC));
1393 if (__inet_inherit_port(sk, newsk) < 0) {
1394 inet_csk_prepare_forced_close(newsk);
1398 *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash),
1401 tcp_move_syn(newtp, req);
1403 /* Clone pktoptions received with SYN, if we own the req */
1404 if (ireq->pktopts) {
1405 newnp->pktoptions = skb_clone(ireq->pktopts,
1406 sk_gfp_mask(sk, GFP_ATOMIC));
1407 consume_skb(ireq->pktopts);
1408 ireq->pktopts = NULL;
1409 if (newnp->pktoptions) {
1410 tcp_v6_restore_cb(newnp->pktoptions);
1411 skb_set_owner_r(newnp->pktoptions, newsk);
1415 if (!req_unhash && found_dup_sk) {
1416 /* This code path should only be executed in the
1417 * syncookie case only
1419 bh_unlock_sock(newsk);
1428 __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1436 INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *,
1438 /* The socket must have it's spinlock held when we get
1439 * here, unless it is a TCP_LISTEN socket.
1441 * We have a potential double-lock case here, so even when
1442 * doing backlog processing we use the BH locking scheme.
1443 * This is because we cannot sleep with the original spinlock
1446 INDIRECT_CALLABLE_SCOPE
1447 int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1449 struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1450 struct sk_buff *opt_skb = NULL;
1451 enum skb_drop_reason reason;
1452 struct tcp_sock *tp;
1454 /* Imagine: socket is IPv6. IPv4 packet arrives,
1455 goes to IPv4 receive handler and backlogged.
1456 From backlog it always goes here. Kerboom...
1457 Fortunately, tcp_rcv_established and rcv_established
1458 handle them correctly, but it is not case with
1459 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1462 if (skb->protocol == htons(ETH_P_IP))
1463 return tcp_v4_do_rcv(sk, skb);
1466 * socket locking is here for SMP purposes as backlog rcv
1467 * is currently called with bh processing disabled.
1470 /* Do Stevens' IPV6_PKTOPTIONS.
1472 Yes, guys, it is the only place in our code, where we
1473 may make it not affecting IPv4.
1474 The rest of code is protocol independent,
1475 and I do not like idea to uglify IPv4.
1477 Actually, all the idea behind IPV6_PKTOPTIONS
1478 looks not very well thought. For now we latch
1479 options, received in the last packet, enqueued
1480 by tcp. Feel free to propose better solution.
1484 opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC));
1486 reason = SKB_DROP_REASON_NOT_SPECIFIED;
1487 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1488 struct dst_entry *dst;
1490 dst = rcu_dereference_protected(sk->sk_rx_dst,
1491 lockdep_sock_is_held(sk));
1493 sock_rps_save_rxhash(sk, skb);
1494 sk_mark_napi_id(sk, skb);
1496 if (sk->sk_rx_dst_ifindex != skb->skb_iif ||
1497 INDIRECT_CALL_1(dst->ops->check, ip6_dst_check,
1498 dst, sk->sk_rx_dst_cookie) == NULL) {
1499 RCU_INIT_POINTER(sk->sk_rx_dst, NULL);
1504 tcp_rcv_established(sk, skb);
1506 goto ipv6_pktoptions;
1510 if (tcp_checksum_complete(skb))
1513 if (sk->sk_state == TCP_LISTEN) {
1514 struct sock *nsk = tcp_v6_cookie_check(sk, skb);
1520 if (tcp_child_process(sk, nsk, skb))
1523 __kfree_skb(opt_skb);
1527 sock_rps_save_rxhash(sk, skb);
1529 if (tcp_rcv_state_process(sk, skb))
1532 goto ipv6_pktoptions;
1536 tcp_v6_send_reset(sk, skb);
1539 __kfree_skb(opt_skb);
1540 kfree_skb_reason(skb, reason);
1543 reason = SKB_DROP_REASON_TCP_CSUM;
1544 trace_tcp_bad_csum(skb);
1545 TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1546 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1551 /* Do you ask, what is it?
1553 1. skb was enqueued by tcp.
1554 2. skb is added to tail of read queue, rather than out of order.
1555 3. socket is not in passive state.
1556 4. Finally, it really contains options, which user wants to receive.
1559 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1560 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1561 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1562 np->mcast_oif = tcp_v6_iif(opt_skb);
1563 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1564 np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1565 if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1566 np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1568 np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1569 if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
1570 skb_set_owner_r(opt_skb, sk);
1571 tcp_v6_restore_cb(opt_skb);
1572 opt_skb = xchg(&np->pktoptions, opt_skb);
1574 __kfree_skb(opt_skb);
1575 opt_skb = xchg(&np->pktoptions, NULL);
1579 consume_skb(opt_skb);
1583 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
1584 const struct tcphdr *th)
1586 /* This is tricky: we move IP6CB at its correct location into
1587 * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
1588 * _decode_session6() uses IP6CB().
1589 * barrier() makes sure compiler won't play aliasing games.
1591 memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
1592 sizeof(struct inet6_skb_parm));
1595 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1596 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1597 skb->len - th->doff*4);
1598 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1599 TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1600 TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1601 TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1602 TCP_SKB_CB(skb)->sacked = 0;
1603 TCP_SKB_CB(skb)->has_rxtstamp =
1604 skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1607 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
1609 enum skb_drop_reason drop_reason;
1610 int sdif = inet6_sdif(skb);
1611 int dif = inet6_iif(skb);
1612 const struct tcphdr *th;
1613 const struct ipv6hdr *hdr;
1617 struct net *net = dev_net(skb->dev);
1619 drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
1620 if (skb->pkt_type != PACKET_HOST)
1624 * Count it even if it's bad.
1626 __TCP_INC_STATS(net, TCP_MIB_INSEGS);
1628 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1631 th = (const struct tcphdr *)skb->data;
1633 if (unlikely(th->doff < sizeof(struct tcphdr) / 4)) {
1634 drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL;
1637 if (!pskb_may_pull(skb, th->doff*4))
1640 if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1643 th = (const struct tcphdr *)skb->data;
1644 hdr = ipv6_hdr(skb);
1647 sk = __inet6_lookup_skb(net->ipv4.tcp_death_row.hashinfo, skb, __tcp_hdrlen(th),
1648 th->source, th->dest, inet6_iif(skb), sdif,
1654 if (sk->sk_state == TCP_TIME_WAIT)
1657 if (sk->sk_state == TCP_NEW_SYN_RECV) {
1658 struct request_sock *req = inet_reqsk(sk);
1659 bool req_stolen = false;
1662 sk = req->rsk_listener;
1663 drop_reason = tcp_inbound_md5_hash(sk, skb,
1664 &hdr->saddr, &hdr->daddr,
1665 AF_INET6, dif, sdif);
1667 sk_drops_add(sk, skb);
1671 if (tcp_checksum_complete(skb)) {
1675 if (unlikely(sk->sk_state != TCP_LISTEN)) {
1676 nsk = reuseport_migrate_sock(sk, req_to_sk(req), skb);
1678 inet_csk_reqsk_queue_drop_and_put(sk, req);
1682 /* reuseport_migrate_sock() has already held one sk_refcnt
1690 if (!tcp_filter(sk, skb)) {
1691 th = (const struct tcphdr *)skb->data;
1692 hdr = ipv6_hdr(skb);
1693 tcp_v6_fill_cb(skb, hdr, th);
1694 nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
1696 drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
1701 /* Another cpu got exclusive access to req
1702 * and created a full blown socket.
1703 * Try to feed this packet to this socket
1704 * instead of discarding it.
1706 tcp_v6_restore_cb(skb);
1710 goto discard_and_relse;
1714 tcp_v6_restore_cb(skb);
1715 } else if (tcp_child_process(sk, nsk, skb)) {
1716 tcp_v6_send_reset(nsk, skb);
1717 goto discard_and_relse;
1724 if (static_branch_unlikely(&ip6_min_hopcount)) {
1725 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
1726 if (hdr->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) {
1727 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1728 goto discard_and_relse;
1732 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) {
1733 drop_reason = SKB_DROP_REASON_XFRM_POLICY;
1734 goto discard_and_relse;
1737 drop_reason = tcp_inbound_md5_hash(sk, skb, &hdr->saddr, &hdr->daddr,
1738 AF_INET6, dif, sdif);
1740 goto discard_and_relse;
1742 if (tcp_filter(sk, skb)) {
1743 drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
1744 goto discard_and_relse;
1746 th = (const struct tcphdr *)skb->data;
1747 hdr = ipv6_hdr(skb);
1748 tcp_v6_fill_cb(skb, hdr, th);
1752 if (sk->sk_state == TCP_LISTEN) {
1753 ret = tcp_v6_do_rcv(sk, skb);
1754 goto put_and_return;
1757 sk_incoming_cpu_update(sk);
1759 bh_lock_sock_nested(sk);
1760 tcp_segs_in(tcp_sk(sk), skb);
1762 if (!sock_owned_by_user(sk)) {
1763 ret = tcp_v6_do_rcv(sk, skb);
1765 if (tcp_add_backlog(sk, skb, &drop_reason))
1766 goto discard_and_relse;
1772 return ret ? -1 : 0;
1775 drop_reason = SKB_DROP_REASON_NO_SOCKET;
1776 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1779 tcp_v6_fill_cb(skb, hdr, th);
1781 if (tcp_checksum_complete(skb)) {
1783 drop_reason = SKB_DROP_REASON_TCP_CSUM;
1784 trace_tcp_bad_csum(skb);
1785 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1787 __TCP_INC_STATS(net, TCP_MIB_INERRS);
1789 tcp_v6_send_reset(NULL, skb);
1793 SKB_DR_OR(drop_reason, NOT_SPECIFIED);
1794 kfree_skb_reason(skb, drop_reason);
1798 sk_drops_add(sk, skb);
1804 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1805 drop_reason = SKB_DROP_REASON_XFRM_POLICY;
1806 inet_twsk_put(inet_twsk(sk));
1810 tcp_v6_fill_cb(skb, hdr, th);
1812 if (tcp_checksum_complete(skb)) {
1813 inet_twsk_put(inet_twsk(sk));
1817 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1822 sk2 = inet6_lookup_listener(net, net->ipv4.tcp_death_row.hashinfo,
1823 skb, __tcp_hdrlen(th),
1824 &ipv6_hdr(skb)->saddr, th->source,
1825 &ipv6_hdr(skb)->daddr,
1827 tcp_v6_iif_l3_slave(skb),
1830 struct inet_timewait_sock *tw = inet_twsk(sk);
1831 inet_twsk_deschedule_put(tw);
1833 tcp_v6_restore_cb(skb);
1841 tcp_v6_timewait_ack(sk, skb);
1844 tcp_v6_send_reset(sk, skb);
1845 inet_twsk_deschedule_put(inet_twsk(sk));
1847 case TCP_TW_SUCCESS:
1853 void tcp_v6_early_demux(struct sk_buff *skb)
1855 struct net *net = dev_net(skb->dev);
1856 const struct ipv6hdr *hdr;
1857 const struct tcphdr *th;
1860 if (skb->pkt_type != PACKET_HOST)
1863 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1866 hdr = ipv6_hdr(skb);
1869 if (th->doff < sizeof(struct tcphdr) / 4)
1872 /* Note : We use inet6_iif() here, not tcp_v6_iif() */
1873 sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
1874 &hdr->saddr, th->source,
1875 &hdr->daddr, ntohs(th->dest),
1876 inet6_iif(skb), inet6_sdif(skb));
1879 skb->destructor = sock_edemux;
1880 if (sk_fullsock(sk)) {
1881 struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst);
1884 dst = dst_check(dst, sk->sk_rx_dst_cookie);
1886 sk->sk_rx_dst_ifindex == skb->skb_iif)
1887 skb_dst_set_noref(skb, dst);
1892 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1893 .twsk_obj_size = sizeof(struct tcp6_timewait_sock),
1894 .twsk_unique = tcp_twsk_unique,
1895 .twsk_destructor = tcp_twsk_destructor,
1898 INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)
1900 __tcp_v6_send_check(skb, &sk->sk_v6_rcv_saddr, &sk->sk_v6_daddr);
1903 const struct inet_connection_sock_af_ops ipv6_specific = {
1904 .queue_xmit = inet6_csk_xmit,
1905 .send_check = tcp_v6_send_check,
1906 .rebuild_header = inet6_sk_rebuild_header,
1907 .sk_rx_dst_set = inet6_sk_rx_dst_set,
1908 .conn_request = tcp_v6_conn_request,
1909 .syn_recv_sock = tcp_v6_syn_recv_sock,
1910 .net_header_len = sizeof(struct ipv6hdr),
1911 .net_frag_header_len = sizeof(struct frag_hdr),
1912 .setsockopt = ipv6_setsockopt,
1913 .getsockopt = ipv6_getsockopt,
1914 .addr2sockaddr = inet6_csk_addr2sockaddr,
1915 .sockaddr_len = sizeof(struct sockaddr_in6),
1916 .mtu_reduced = tcp_v6_mtu_reduced,
1919 #ifdef CONFIG_TCP_MD5SIG
1920 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1921 .md5_lookup = tcp_v6_md5_lookup,
1922 .calc_md5_hash = tcp_v6_md5_hash_skb,
1923 .md5_parse = tcp_v6_parse_md5_keys,
1928 * TCP over IPv4 via INET6 API
1930 static const struct inet_connection_sock_af_ops ipv6_mapped = {
1931 .queue_xmit = ip_queue_xmit,
1932 .send_check = tcp_v4_send_check,
1933 .rebuild_header = inet_sk_rebuild_header,
1934 .sk_rx_dst_set = inet_sk_rx_dst_set,
1935 .conn_request = tcp_v6_conn_request,
1936 .syn_recv_sock = tcp_v6_syn_recv_sock,
1937 .net_header_len = sizeof(struct iphdr),
1938 .setsockopt = ipv6_setsockopt,
1939 .getsockopt = ipv6_getsockopt,
1940 .addr2sockaddr = inet6_csk_addr2sockaddr,
1941 .sockaddr_len = sizeof(struct sockaddr_in6),
1942 .mtu_reduced = tcp_v4_mtu_reduced,
1945 #ifdef CONFIG_TCP_MD5SIG
1946 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1947 .md5_lookup = tcp_v4_md5_lookup,
1948 .calc_md5_hash = tcp_v4_md5_hash_skb,
1949 .md5_parse = tcp_v6_parse_md5_keys,
1953 /* NOTE: A lot of things set to zero explicitly by call to
1954 * sk_alloc() so need not be done here.
1956 static int tcp_v6_init_sock(struct sock *sk)
1958 struct inet_connection_sock *icsk = inet_csk(sk);
1962 icsk->icsk_af_ops = &ipv6_specific;
1964 #ifdef CONFIG_TCP_MD5SIG
1965 tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1971 static void tcp_v6_destroy_sock(struct sock *sk)
1973 tcp_v4_destroy_sock(sk);
1974 inet6_destroy_sock(sk);
1977 #ifdef CONFIG_PROC_FS
1978 /* Proc filesystem TCPv6 sock list dumping. */
1979 static void get_openreq6(struct seq_file *seq,
1980 const struct request_sock *req, int i)
1982 long ttd = req->rsk_timer.expires - jiffies;
1983 const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
1984 const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
1990 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1991 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
1993 src->s6_addr32[0], src->s6_addr32[1],
1994 src->s6_addr32[2], src->s6_addr32[3],
1995 inet_rsk(req)->ir_num,
1996 dest->s6_addr32[0], dest->s6_addr32[1],
1997 dest->s6_addr32[2], dest->s6_addr32[3],
1998 ntohs(inet_rsk(req)->ir_rmt_port),
2000 0, 0, /* could print option size, but that is af dependent. */
2001 1, /* timers active (only the expire timer) */
2002 jiffies_to_clock_t(ttd),
2004 from_kuid_munged(seq_user_ns(seq),
2005 sock_i_uid(req->rsk_listener)),
2006 0, /* non standard timer */
2007 0, /* open_requests have no inode */
2011 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
2013 const struct in6_addr *dest, *src;
2016 unsigned long timer_expires;
2017 const struct inet_sock *inet = inet_sk(sp);
2018 const struct tcp_sock *tp = tcp_sk(sp);
2019 const struct inet_connection_sock *icsk = inet_csk(sp);
2020 const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
2024 dest = &sp->sk_v6_daddr;
2025 src = &sp->sk_v6_rcv_saddr;
2026 destp = ntohs(inet->inet_dport);
2027 srcp = ntohs(inet->inet_sport);
2029 if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
2030 icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
2031 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
2033 timer_expires = icsk->icsk_timeout;
2034 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2036 timer_expires = icsk->icsk_timeout;
2037 } else if (timer_pending(&sp->sk_timer)) {
2039 timer_expires = sp->sk_timer.expires;
2042 timer_expires = jiffies;
2045 state = inet_sk_state_load(sp);
2046 if (state == TCP_LISTEN)
2047 rx_queue = READ_ONCE(sp->sk_ack_backlog);
2049 /* Because we don't lock the socket,
2050 * we might find a transient negative value.
2052 rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
2053 READ_ONCE(tp->copied_seq), 0);
2056 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2057 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
2059 src->s6_addr32[0], src->s6_addr32[1],
2060 src->s6_addr32[2], src->s6_addr32[3], srcp,
2061 dest->s6_addr32[0], dest->s6_addr32[1],
2062 dest->s6_addr32[2], dest->s6_addr32[3], destp,
2064 READ_ONCE(tp->write_seq) - tp->snd_una,
2067 jiffies_delta_to_clock_t(timer_expires - jiffies),
2068 icsk->icsk_retransmits,
2069 from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
2070 icsk->icsk_probes_out,
2072 refcount_read(&sp->sk_refcnt), sp,
2073 jiffies_to_clock_t(icsk->icsk_rto),
2074 jiffies_to_clock_t(icsk->icsk_ack.ato),
2075 (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp),
2077 state == TCP_LISTEN ?
2078 fastopenq->max_qlen :
2079 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
2083 static void get_timewait6_sock(struct seq_file *seq,
2084 struct inet_timewait_sock *tw, int i)
2086 long delta = tw->tw_timer.expires - jiffies;
2087 const struct in6_addr *dest, *src;
2090 dest = &tw->tw_v6_daddr;
2091 src = &tw->tw_v6_rcv_saddr;
2092 destp = ntohs(tw->tw_dport);
2093 srcp = ntohs(tw->tw_sport);
2096 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2097 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
2099 src->s6_addr32[0], src->s6_addr32[1],
2100 src->s6_addr32[2], src->s6_addr32[3], srcp,
2101 dest->s6_addr32[0], dest->s6_addr32[1],
2102 dest->s6_addr32[2], dest->s6_addr32[3], destp,
2103 tw->tw_substate, 0, 0,
2104 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
2105 refcount_read(&tw->tw_refcnt), tw);
2108 static int tcp6_seq_show(struct seq_file *seq, void *v)
2110 struct tcp_iter_state *st;
2111 struct sock *sk = v;
2113 if (v == SEQ_START_TOKEN) {
2118 "st tx_queue rx_queue tr tm->when retrnsmt"
2119 " uid timeout inode\n");
2124 if (sk->sk_state == TCP_TIME_WAIT)
2125 get_timewait6_sock(seq, v, st->num);
2126 else if (sk->sk_state == TCP_NEW_SYN_RECV)
2127 get_openreq6(seq, v, st->num);
2129 get_tcp6_sock(seq, v, st->num);
2134 static const struct seq_operations tcp6_seq_ops = {
2135 .show = tcp6_seq_show,
2136 .start = tcp_seq_start,
2137 .next = tcp_seq_next,
2138 .stop = tcp_seq_stop,
2141 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2145 int __net_init tcp6_proc_init(struct net *net)
2147 if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops,
2148 sizeof(struct tcp_iter_state), &tcp6_seq_afinfo))
2153 void tcp6_proc_exit(struct net *net)
2155 remove_proc_entry("tcp6", net->proc_net);
2159 struct proto tcpv6_prot = {
2161 .owner = THIS_MODULE,
2163 .pre_connect = tcp_v6_pre_connect,
2164 .connect = tcp_v6_connect,
2165 .disconnect = tcp_disconnect,
2166 .accept = inet_csk_accept,
2168 .init = tcp_v6_init_sock,
2169 .destroy = tcp_v6_destroy_sock,
2170 .shutdown = tcp_shutdown,
2171 .setsockopt = tcp_setsockopt,
2172 .getsockopt = tcp_getsockopt,
2173 .bpf_bypass_getsockopt = tcp_bpf_bypass_getsockopt,
2174 .keepalive = tcp_set_keepalive,
2175 .recvmsg = tcp_recvmsg,
2176 .sendmsg = tcp_sendmsg,
2177 .sendpage = tcp_sendpage,
2178 .backlog_rcv = tcp_v6_do_rcv,
2179 .release_cb = tcp_release_cb,
2181 .unhash = inet_unhash,
2182 .get_port = inet_csk_get_port,
2183 .put_port = inet_put_port,
2184 #ifdef CONFIG_BPF_SYSCALL
2185 .psock_update_sk_prot = tcp_bpf_update_proto,
2187 .enter_memory_pressure = tcp_enter_memory_pressure,
2188 .leave_memory_pressure = tcp_leave_memory_pressure,
2189 .stream_memory_free = tcp_stream_memory_free,
2190 .sockets_allocated = &tcp_sockets_allocated,
2192 .memory_allocated = &tcp_memory_allocated,
2193 .per_cpu_fw_alloc = &tcp_memory_per_cpu_fw_alloc,
2195 .memory_pressure = &tcp_memory_pressure,
2196 .orphan_count = &tcp_orphan_count,
2197 .sysctl_mem = sysctl_tcp_mem,
2198 .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem),
2199 .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem),
2200 .max_header = MAX_TCP_HEADER,
2201 .obj_size = sizeof(struct tcp6_sock),
2202 .slab_flags = SLAB_TYPESAFE_BY_RCU,
2203 .twsk_prot = &tcp6_timewait_sock_ops,
2204 .rsk_prot = &tcp6_request_sock_ops,
2206 .no_autobind = true,
2207 .diag_destroy = tcp_abort,
2209 EXPORT_SYMBOL_GPL(tcpv6_prot);
2211 static const struct inet6_protocol tcpv6_protocol = {
2212 .handler = tcp_v6_rcv,
2213 .err_handler = tcp_v6_err,
2214 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2217 static struct inet_protosw tcpv6_protosw = {
2218 .type = SOCK_STREAM,
2219 .protocol = IPPROTO_TCP,
2220 .prot = &tcpv6_prot,
2221 .ops = &inet6_stream_ops,
2222 .flags = INET_PROTOSW_PERMANENT |
2226 static int __net_init tcpv6_net_init(struct net *net)
2228 return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2229 SOCK_RAW, IPPROTO_TCP, net);
2232 static void __net_exit tcpv6_net_exit(struct net *net)
2234 inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2237 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
2239 tcp_twsk_purge(net_exit_list, AF_INET6);
2242 static struct pernet_operations tcpv6_net_ops = {
2243 .init = tcpv6_net_init,
2244 .exit = tcpv6_net_exit,
2245 .exit_batch = tcpv6_net_exit_batch,
2248 int __init tcpv6_init(void)
2252 ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2256 /* register inet6 protocol */
2257 ret = inet6_register_protosw(&tcpv6_protosw);
2259 goto out_tcpv6_protocol;
2261 ret = register_pernet_subsys(&tcpv6_net_ops);
2263 goto out_tcpv6_protosw;
2265 ret = mptcpv6_init();
2267 goto out_tcpv6_pernet_subsys;
2272 out_tcpv6_pernet_subsys:
2273 unregister_pernet_subsys(&tcpv6_net_ops);
2275 inet6_unregister_protosw(&tcpv6_protosw);
2277 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2281 void tcpv6_exit(void)
2283 unregister_pernet_subsys(&tcpv6_net_ops);
2284 inet6_unregister_protosw(&tcpv6_protosw);
2285 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);